Romio Refresh from mpich2-1.3.1. Work by Pascal Deveze, tested through bitbucket by Jeff Squyres (https://bitbucket.org/devezep/new-romio-for-openmpi).
This commit was SVN r24264.
Этот коммит содержится в:
родитель
b2f3a5b7c2
Коммит
0e921bba7f
@ -35,4 +35,5 @@ __sgi_mpi
|
||||
__hp_mpi
|
||||
__cray_mpi
|
||||
__lam_mpi
|
||||
__Darwin
|
||||
__open_mpi
|
||||
|
@ -1,58 +0,0 @@
|
||||
<dir>
|
||||
<file name="ad_bgl_getsh.c" info="1205188711"/>
|
||||
<file name="ad_bgl_fcntl.c" info="1205188711"/>
|
||||
<file name="ad_bgl_tuning.c" info="1205188711"/>
|
||||
<file name="ad_bgl_pset.h" info="1205188711"/>
|
||||
<file name="ad_bgl_aggrs.c" info="1205188711"/>
|
||||
<file name="ad_bgl_wrcoll.c" info="1205188711"/>
|
||||
<file name="ad_bgl_aggrs.h" info="1205188711"/>
|
||||
<file name="ad_bgl_pset.c" info="1205188711"/>
|
||||
<file name="ad_bgl_setsh.c" info="1205188711"/>
|
||||
<file name="ad_bgl_close.c" info="1206398065"/>
|
||||
<file name="ad_bgl.h" info="1205188711"/>
|
||||
<file name="ad_bgl_read.c" info="1205188711"/>
|
||||
<file name="ad_bgl_rdcoll.c" info="1205188711"/>
|
||||
<file name="ad_bgl_open.c" info="1205188711"/>
|
||||
<file name="ad_bgl_tuning.h" info="1205188711"/>
|
||||
<file name="ad_bgl_write.c" info="1205188711"/>
|
||||
<file name="ad_bgl_hints.c" info="1205188711"/>
|
||||
<file name="ad_bgl.c" info="1205188711"/>
|
||||
</dir>
|
||||
<data>
|
||||
<fileinfo name="ad_bgl_getsh.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_fcntl.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_tuning.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_pset.h">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_aggrs.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_wrcoll.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_aggrs.h">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_pset.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_setsh.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_close.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl.h">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_read.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_rdcoll.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_open.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_tuning.h">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_write.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl_hints.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_bgl.c">
|
||||
</fileinfo>
|
||||
</data>
|
@ -26,6 +26,7 @@ libadio_bgl_la_SOURCES = \
|
||||
ad_bgl.c \
|
||||
ad_bgl_close.c \
|
||||
ad_bgl_fcntl.c \
|
||||
ad_bgl_flush.c \
|
||||
ad_bgl_getsh.c \
|
||||
ad_bgl.h \
|
||||
ad_bgl_hints.c \
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl.c
|
||||
* \brief ???
|
||||
@ -18,6 +19,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_BGL_operations = {
|
||||
ADIOI_BGL_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* Collective open */
|
||||
ADIOI_BGL_ReadContig, /* ReadContig */
|
||||
ADIOI_BGL_WriteContig, /* WriteContig */
|
||||
#if BGL_OPTIM_STEP1_2
|
||||
@ -51,7 +53,8 @@ struct ADIOI_Fns_struct ADIO_BGL_operations = {
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_BGL_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
};
|
||||
|
@ -28,8 +28,10 @@
|
||||
#include <aio.h>
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
int ADIOI_BGL_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, void *handle);
|
||||
#endif
|
||||
|
||||
void ADIOI_BGL_Open(ADIO_File fd, int *error_code);
|
||||
|
||||
@ -87,6 +89,7 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code);
|
||||
void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
|
||||
|
||||
void ADIOI_BGL_Flush(ADIO_File fd, int *error_code);
|
||||
|
||||
#include "ad_bgl_tuning.h"
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_aggrs.c
|
||||
* \brief The externally used function from this file is is declared in ad_bgl_aggrs.h
|
||||
@ -7,7 +8,7 @@
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* Copyright (C) 1997-2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
@ -16,10 +17,49 @@
|
||||
#include "ad_bgl.h"
|
||||
#include "ad_bgl_pset.h"
|
||||
#include "ad_bgl_aggrs.h"
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_DBG_LOGGING
|
||||
#define AGG_DEBUG 1
|
||||
#endif
|
||||
|
||||
|
||||
int aggrsInPsetSize=0;
|
||||
int *aggrsInPset=NULL;
|
||||
|
||||
static int aggrsInPsetSize=0;
|
||||
static int *aggrsInPset=NULL;
|
||||
|
||||
/* Comments copied from common:
|
||||
* This file contains four functions:
|
||||
*
|
||||
* ADIOI_Calc_aggregator()
|
||||
* ADIOI_Calc_file_domains()
|
||||
* ADIOI_Calc_my_req()
|
||||
* ADIOI_Calc_others_req()
|
||||
*
|
||||
* The last three of these were originally in ad_read_coll.c, but they are
|
||||
* also shared with ad_write_coll.c. I felt that they were better kept with
|
||||
* the rest of the shared aggregation code.
|
||||
*/
|
||||
|
||||
/* Discussion of values available from above:
|
||||
*
|
||||
* ADIO_Offset st_offsets[0..nprocs-1]
|
||||
* ADIO_Offset end_offsets[0..nprocs-1]
|
||||
* These contain a list of start and end offsets for each process in
|
||||
* the communicator. For example, an access at loc 10, size 10 would
|
||||
* have a start offset of 10 and end offset of 19.
|
||||
* int nprocs
|
||||
* number of processors in the collective I/O communicator
|
||||
* ADIO_Offset min_st_offset
|
||||
* ADIO_Offset fd_start[0..nprocs_for_coll-1]
|
||||
* starting location of "file domain"; region that a given process will
|
||||
* perform aggregation for (i.e. actually do I/O)
|
||||
* ADIO_Offset fd_end[0..nprocs_for_coll-1]
|
||||
* start + size - 1 roughly, but it can be less, or 0, in the case of
|
||||
* uneven distributions
|
||||
*/
|
||||
|
||||
/* forward declaration */
|
||||
static void
|
||||
@ -219,8 +259,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
ADIOI_BGL_ProcInfo_t *all_procInfo,
|
||||
int *aggrsInPset )
|
||||
{
|
||||
# define DEBUG 0
|
||||
# if DEBUG
|
||||
# if AGG_DEBUG
|
||||
int i;
|
||||
# endif
|
||||
int naggs;
|
||||
@ -229,9 +268,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
/* compute the ranklist of IO aggregators and put into tmp_ranklist */
|
||||
tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
|
||||
|
||||
# if DEBUG
|
||||
for (i=0; i<confInfo->nProcs; i++)
|
||||
printf( "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
|
||||
# if AGG_DEBUG
|
||||
for (i=0; i<confInfo->nProcs; i++) {
|
||||
DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
|
||||
}
|
||||
# endif
|
||||
|
||||
naggs =
|
||||
@ -239,7 +279,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
|
||||
# define VERIFY 0
|
||||
# if VERIFY
|
||||
printf( "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n",
|
||||
DBG_FPRINTF(stderr, "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n",
|
||||
confInfo->PsetSize ,
|
||||
confInfo->numPsets ,
|
||||
confInfo->isVNM ,
|
||||
@ -250,9 +290,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
naggs );
|
||||
# endif
|
||||
|
||||
# if DEBUG
|
||||
for (i=0; i<naggs; i++)
|
||||
printf( "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
|
||||
# if AGG_DEBUG
|
||||
for (i=0; i<naggs; i++) {
|
||||
DBG_FPRINTF(stderr, "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
|
||||
}
|
||||
# endif
|
||||
|
||||
/* copy the ranklist of IO aggregators to fd->hints */
|
||||
@ -267,293 +308,34 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Compute a dynamic access range based file domain partition among I/O aggregators,
|
||||
* which align to the GPFS block size
|
||||
* Divide the I/O workload among "nprocs_for_coll" processes. This is
|
||||
* done by (logically) dividing the file into file domains (FDs); each
|
||||
* process may directly access only its own file domain.
|
||||
* Additional effort is to make sure that each I/O aggregator get
|
||||
* a file domain that aligns to the GPFS block size. So, there will
|
||||
* not be any false sharing of GPFS file blocks among multiple I/O nodes.
|
||||
*/
|
||||
void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
|
||||
ADIO_Offset *end_offsets,
|
||||
int nprocs,
|
||||
int nprocs_for_coll,
|
||||
ADIO_Offset *min_st_offset_ptr,
|
||||
ADIO_Offset **fd_start_ptr,
|
||||
ADIO_Offset **fd_end_ptr,
|
||||
ADIO_Offset *fd_size_ptr,
|
||||
void *fs_ptr)
|
||||
{
|
||||
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
|
||||
int i, aggr;
|
||||
static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
|
||||
__blksize_t blksize = 1048576; /* default to 1M */
|
||||
if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
|
||||
blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
|
||||
/* FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);*/
|
||||
|
||||
/* find the range of all the requests */
|
||||
min_st_offset = st_offsets [0];
|
||||
max_end_offset = end_offsets[0];
|
||||
for (i=1; i<nprocs; i++) {
|
||||
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
|
||||
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
|
||||
}
|
||||
|
||||
// printf( "_calc_file_domains, min_st_offset, max_ = %qd, %qd\n", min_st_offset, max_end_offset );
|
||||
|
||||
/* determine the "file domain (FD)" of each process, i.e., the portion of
|
||||
the file that will be "owned" by each process */
|
||||
|
||||
ADIO_Offset gpfs_ub = (max_end_offset +blksize-1) / blksize * blksize - 1;
|
||||
ADIO_Offset gpfs_lb = min_st_offset / blksize * blksize;
|
||||
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +blksize-1) / blksize * blksize - 1 - max_end_offset;
|
||||
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / blksize * blksize;
|
||||
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
|
||||
|
||||
int naggs = nprocs_for_coll;
|
||||
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
fd_start = *fd_start_ptr;
|
||||
fd_end = *fd_end_ptr;
|
||||
|
||||
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize;
|
||||
ADIO_Offset nb_cn_small = n_gpfs_blk/naggs;
|
||||
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
|
||||
ADIO_Offset naggs_small = naggs - naggs_large;
|
||||
|
||||
for (i=0; i<naggs; i++)
|
||||
if (i < naggs_small) fd_size[i] = nb_cn_small * blksize;
|
||||
else fd_size[i] = (nb_cn_small+1) * blksize;
|
||||
|
||||
/* FPRINTF(stderr,"%s(%d): "
|
||||
"gpfs_ub %llu, "
|
||||
"gpfs_lb %llu, "
|
||||
"gpfs_ub_rdoff %llu, "
|
||||
"gpfs_lb_rdoff %llu, "
|
||||
"fd_gpfs_range %llu, "
|
||||
"n_gpfs_blk %llu, "
|
||||
"nb_cn_small %llu, "
|
||||
"naggs_large %llu, "
|
||||
"naggs_small %llu, "
|
||||
"\n",
|
||||
myname,__LINE__,
|
||||
gpfs_ub ,
|
||||
gpfs_lb ,
|
||||
gpfs_ub_rdoff,
|
||||
gpfs_lb_rdoff,
|
||||
fd_gpfs_range,
|
||||
n_gpfs_blk ,
|
||||
nb_cn_small ,
|
||||
naggs_large ,
|
||||
naggs_small
|
||||
);
|
||||
*/
|
||||
fd_size[0] -= gpfs_lb_rdoff;
|
||||
fd_size[naggs-1] -= gpfs_ub_rdoff;
|
||||
|
||||
/* compute the file domain for each aggr */
|
||||
ADIO_Offset offset = min_st_offset;
|
||||
for (aggr=0; aggr<naggs; aggr++) {
|
||||
fd_start[aggr] = offset;
|
||||
fd_end [aggr] = offset + fd_size[aggr] - 1;
|
||||
offset += fd_size[aggr];
|
||||
}
|
||||
|
||||
*fd_size_ptr = fd_size[0];
|
||||
*min_st_offset_ptr = min_st_offset;
|
||||
|
||||
ADIOI_Free (fd_size);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* deprecated
|
||||
/* Description from common/ad_aggregate.c. (Does it completely apply to bgl?)
|
||||
* ADIOI_Calc_aggregator()
|
||||
*
|
||||
void ADIOI_BGL_GPFS_Calc_file_domain0(ADIO_Offset *st_offsets,
|
||||
ADIO_Offset *end_offsets,
|
||||
int nprocs,
|
||||
int nprocs_for_coll,
|
||||
ADIO_Offset *min_st_offset_ptr,
|
||||
ADIO_Offset **fd_start_ptr,
|
||||
ADIO_Offset **fd_end_ptr,
|
||||
ADIO_Offset *fd_size_ptr)
|
||||
{
|
||||
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
|
||||
int i;
|
||||
static int GPFS_BSIZE=1048576;
|
||||
* find the range of all the requests *
|
||||
min_st_offset = st_offsets [0];
|
||||
max_end_offset = end_offsets[0];
|
||||
for (i=1; i<nprocs; i++) {
|
||||
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
|
||||
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
|
||||
}
|
||||
|
||||
* determine the "file domain (FD)" of each process, i.e., the portion of
|
||||
the file that will be "owned" by each process *
|
||||
|
||||
* GPFS specific, pseudo starting/end point has to round to GPFS_BSIZE *
|
||||
ADIO_Offset gpfs_ub = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1;
|
||||
ADIO_Offset gpfs_lb = min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
|
||||
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1 - max_end_offset;
|
||||
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
|
||||
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
|
||||
|
||||
* all computation of partition is based on the rounded pseudo-range *
|
||||
ADIO_Offset fds_ub = (fd_gpfs_range +nprocs_for_coll-1) / nprocs_for_coll;
|
||||
ADIO_Offset fds_lb = fd_gpfs_range / nprocs_for_coll;
|
||||
int naggs = nprocs_for_coll;
|
||||
int npsets = aggrsInPset[0]; * special meaning for element 0 *
|
||||
fd_size = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
|
||||
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
|
||||
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
|
||||
fd_start = *fd_start_ptr;
|
||||
fd_end = *fd_end_ptr;
|
||||
|
||||
* some pre-computation to determine rough ratio of when to up-fit, when to low-fit *
|
||||
* 1. get the estimated data per pset *
|
||||
* 2. determine a factor between up and down *
|
||||
int avg_aggrsInPset = (naggs +npsets-1)/npsets;
|
||||
ADIO_Offset avg_bytes_perPset = fd_gpfs_range / npsets;
|
||||
ADIO_Offset resid = avg_bytes_perPset % GPFS_BSIZE;
|
||||
ADIO_Offset downr = GPFS_BSIZE - resid;
|
||||
int small = (resid < downr);
|
||||
int ratio = downr == 0 ? npsets + 2 : (resid +downr-1)/downr;
|
||||
if (small) ratio = resid == 0 ? npsets + 2 : (downr +resid-1)/resid;
|
||||
|
||||
|
||||
* go through aggrsInfo of all PSETs *
|
||||
ADIO_Offset fd_range = fd_gpfs_range;
|
||||
int aggr = 0, pset;
|
||||
for (pset=0; pset<npsets; pset++) {
|
||||
|
||||
ADIO_Offset fds_try = fds_lb;
|
||||
int my_naggs = aggrsInPset[pset+1];
|
||||
ADIO_Offset fds_pset;
|
||||
|
||||
* Last pset will deal with the residuals *
|
||||
if (pset == npsets-1)
|
||||
fds_pset = fd_range;
|
||||
else
|
||||
{
|
||||
int cond1 = ((pset+1) % ratio == 0);
|
||||
int cond2 = ((pset+1) % ratio != 0);
|
||||
|
||||
if (small) {
|
||||
int temp = cond1; cond1 = cond2; cond2 = temp;
|
||||
}
|
||||
|
||||
if (cond1) {
|
||||
fds_pset = fds_try * my_naggs;
|
||||
if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE
|
||||
fds_pset = ((fds_pset +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;
|
||||
}
|
||||
if (cond2)
|
||||
{
|
||||
fds_try = fds_ub;
|
||||
fds_pset = fds_try * my_naggs;
|
||||
if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE
|
||||
fds_pset = (fds_pset / GPFS_BSIZE) * GPFS_BSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
* for aggrs in each PSET, divide evenly the data range *
|
||||
#define CN_ALIGN 1
|
||||
#if !CN_ALIGN
|
||||
fd_range -= fds_pset;
|
||||
if ( pset == 0 ) fds_pset -= gpfs_lb_rdoff;
|
||||
if ( pset == npsets-1 ) fds_pset -= gpfs_ub_rdoff;
|
||||
int p;
|
||||
for (p=0; p<my_naggs; p++) {
|
||||
fd_size[aggr] = (fds_pset +my_naggs-1) / my_naggs;
|
||||
if (p== my_naggs-1)
|
||||
fd_size[aggr] -= (fd_size[aggr]*my_naggs - fds_pset);
|
||||
|
||||
aggr++;
|
||||
}
|
||||
#else
|
||||
ADIO_Offset avg_bytes_perP = fds_pset / my_naggs;
|
||||
ADIO_Offset resid2 = avg_bytes_perP % GPFS_BSIZE;
|
||||
ADIO_Offset downr2 = GPFS_BSIZE - resid2;
|
||||
int small2 = (resid2 < downr2);
|
||||
int ratio2 = downr2 == 0 ? my_naggs + 2 : (resid2 +downr2-1)/downr2;
|
||||
if (small2) ratio2 = resid2 == 0 ? my_naggs + 2 : (downr2 +resid2-1)/resid2;
|
||||
ADIO_Offset accu = 0;
|
||||
int p;
|
||||
for (p=0; p<my_naggs; p++) {
|
||||
int cond1 = ((p+1) % ratio2 == 0);
|
||||
int cond2 = ((p+1) % ratio2 != 0);
|
||||
if (small2) {
|
||||
int temp = cond1; cond1 = cond2; cond2 = temp;
|
||||
}
|
||||
fd_size[aggr] = avg_bytes_perP;
|
||||
if (cond2) fd_size[aggr] = ((fd_size[aggr] +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;
|
||||
if (cond1) fd_size[aggr] = ((fd_size[aggr] )/GPFS_BSIZE) * GPFS_BSIZE;
|
||||
if (p== my_naggs-1)
|
||||
fd_size[aggr] = (fds_pset - accu);
|
||||
|
||||
accu += fd_size[aggr];
|
||||
fd_range -= fd_size[aggr];
|
||||
aggr++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
* after scheduling, the first and the last region has to remove the round-off effect *
|
||||
|
||||
#if CN_ALIGN
|
||||
fd_size[0] -= gpfs_lb_rdoff;
|
||||
fd_size[naggs-1] -= gpfs_ub_rdoff;
|
||||
#endif
|
||||
|
||||
* compute the file domain for each aggr *
|
||||
ADIO_Offset offset = min_st_offset;
|
||||
for (aggr=0; aggr<naggs; aggr++) {
|
||||
fd_start[aggr] = offset;
|
||||
fd_end [aggr] = offset + fd_size[aggr] - 1;
|
||||
offset += fd_size[aggr];
|
||||
}
|
||||
|
||||
* The intention here is to implement a function which provides basically
|
||||
* the same functionality as in Rajeev's original version of
|
||||
* ADIOI_Calc_my_req(). He used a ceiling division approach to assign the
|
||||
* file domains, and we use the same approach here when calculating the
|
||||
* location of an offset/len in a specific file domain. Further we assume
|
||||
* this same distribution when calculating the rank_index, which is later
|
||||
* used to map to a specific process rank in charge of the file domain.
|
||||
*
|
||||
printf( "\t%6d : %12qd:%12qd, %12qd:%12qd:%12qd, %12qd:%12qd:%12qd\n",
|
||||
naggs,
|
||||
min_st_offset,
|
||||
max_end_offset,
|
||||
fd_start[0],
|
||||
fd_end [0],
|
||||
fd_size [0],
|
||||
fd_start[naggs-1],
|
||||
fd_end [naggs-1],
|
||||
fd_size [naggs-1] );
|
||||
* A better (i.e. more general) approach would be to use the list of file
|
||||
* domains only. This would be slower in the case where the
|
||||
* original ceiling division was used, but it would allow for arbitrary
|
||||
* distributions of regions to aggregators. We'd need to know the
|
||||
* nprocs_for_coll in that case though, which we don't have now.
|
||||
*
|
||||
|
||||
|
||||
*fd_size_ptr = fd_size[0];
|
||||
*min_st_offset_ptr = min_st_offset;
|
||||
|
||||
ADIOI_Free (fd_size);
|
||||
}
|
||||
* Note a significant difference between this function and Rajeev's old code:
|
||||
* this code doesn't necessarily return a rank in the range
|
||||
* 0..nprocs_for_coll; instead you get something in 0..nprocs. This is a
|
||||
* result of the rank mapping; any set of ranks in the communicator could be
|
||||
* used now.
|
||||
*
|
||||
* Returns an integer representing a rank in the collective I/O communicator.
|
||||
*
|
||||
* The "len" parameter is also modified to indicate the amount of data
|
||||
* actually available in this file domain.
|
||||
*/
|
||||
|
||||
/*
|
||||
* When a process is an IO aggregator, this will return its index in the aggrs list.
|
||||
* Otherwise, this will return -1
|
||||
*/
|
||||
int ADIOI_BGL_Aggrs_index( ADIO_File fd, int myrank )
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<fd->hints->cb_nodes; i++)
|
||||
if (fd->hints->ranklist[i] == myrank) return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is more general aggregator search function which does not base on the assumption
|
||||
* that each aggregator hosts the file domain with the same size
|
||||
@ -574,6 +356,21 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
|
||||
/* binary search --> rank_index is returned */
|
||||
int ub = fd->hints->cb_nodes;
|
||||
int lb = 0;
|
||||
/* get an index into our array of aggregators */
|
||||
/* Common code for striping - bgl doesn't use it but it's
|
||||
here to make diff'ing easier.
|
||||
rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
|
||||
|
||||
if (fd->hints->striping_unit > 0) {
|
||||
* wkliao: implementation for file domain alignment
|
||||
fd_start[] and fd_end[] have been aligned with file lock
|
||||
boundaries when returned from ADIOI_Calc_file_domains() so cannot
|
||||
just use simple arithmatic as above *
|
||||
rank_index = 0;
|
||||
while (off > fd_end[rank_index]) rank_index++;
|
||||
}
|
||||
bgl does it's own striping below
|
||||
*/
|
||||
rank_index = fd->hints->cb_nodes / 2;
|
||||
while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
|
||||
if ( off > fd_end [rank_index] ) {
|
||||
@ -586,8 +383,15 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
|
||||
rank_index = (rank_index + lb) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
// printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
|
||||
/* we index into fd_end with rank_index, and fd_end was allocated to be no
|
||||
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
|
||||
* overrunning arrays. Obviously, we should never ever hit this abort */
|
||||
if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
|
||||
FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
|
||||
rank_index,fd->hints->cb_nodes,fd_size,off);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
// DBG_FPRINTF ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
|
||||
|
||||
/*
|
||||
* remember here that even in Rajeev's original code it was the case that
|
||||
@ -611,16 +415,161 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
|
||||
return rank;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a dynamic access range based file domain partition among I/O aggregators,
|
||||
* which align to the GPFS block size
|
||||
* Divide the I/O workload among "nprocs_for_coll" processes. This is
|
||||
* done by (logically) dividing the file into file domains (FDs); each
|
||||
* process may directly access only its own file domain.
|
||||
* Additional effort is to make sure that each I/O aggregator get
|
||||
* a file domain that aligns to the GPFS block size. So, there will
|
||||
* not be any false sharing of GPFS file blocks among multiple I/O nodes.
|
||||
*
|
||||
* The common version of this now accepts a min_fd_size and striping_unit.
|
||||
* It doesn't seem necessary here (using GPFS block sizes) but keep it in mind
|
||||
* (e.g. we could pass striping unit instead of using fs_ptr->blksize).
|
||||
*/
|
||||
void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
|
||||
ADIO_Offset *end_offsets,
|
||||
int nprocs,
|
||||
int nprocs_for_coll,
|
||||
ADIO_Offset *min_st_offset_ptr,
|
||||
ADIO_Offset **fd_start_ptr,
|
||||
ADIO_Offset **fd_end_ptr,
|
||||
ADIO_Offset *fd_size_ptr,
|
||||
void *fs_ptr)
|
||||
{
|
||||
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
|
||||
int i, aggr;
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5004, 0, NULL);
|
||||
#endif
|
||||
|
||||
# if AGG_DEBUG
|
||||
static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
|
||||
DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n",
|
||||
myname,__LINE__,nprocs_for_coll);
|
||||
# endif
|
||||
__blksize_t blksize = 1048576; /* default to 1M */
|
||||
if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
|
||||
blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
|
||||
# if AGG_DEBUG
|
||||
DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);
|
||||
# endif
|
||||
/* find min of start offsets and max of end offsets of all processes */
|
||||
min_st_offset = st_offsets [0];
|
||||
max_end_offset = end_offsets[0];
|
||||
for (i=1; i<nprocs; i++) {
|
||||
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
|
||||
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
|
||||
}
|
||||
|
||||
// DBG_FPRINTF(stderr, "_calc_file_domains, min_st_offset, max_ = %qd, %qd\n", min_st_offset, max_end_offset );
|
||||
|
||||
/* determine the "file domain (FD)" of each process, i.e., the portion of
|
||||
the file that will be "owned" by each process */
|
||||
|
||||
ADIO_Offset gpfs_ub = (max_end_offset +blksize-1) / blksize * blksize - 1;
|
||||
ADIO_Offset gpfs_lb = min_st_offset / blksize * blksize;
|
||||
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +blksize-1) / blksize * blksize - 1 - max_end_offset;
|
||||
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / blksize * blksize;
|
||||
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
|
||||
|
||||
int naggs = nprocs_for_coll;
|
||||
|
||||
/* Tweak the file domains so that no fd is smaller than a threshold. We
|
||||
* have to strike a balance between efficency and parallelism: somewhere
|
||||
* between 10k processes sending 32-byte requests and one process sending a
|
||||
* 320k request is a (system-dependent) sweet spot
|
||||
|
||||
This is from the common code - the new min_fd_size parm that we didn't implement.
|
||||
(And common code uses a different declaration of fd_size so beware)
|
||||
|
||||
if (fd_size < min_fd_size)
|
||||
fd_size = min_fd_size;
|
||||
*/
|
||||
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
fd_start = *fd_start_ptr;
|
||||
fd_end = *fd_end_ptr;
|
||||
|
||||
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize;
|
||||
ADIO_Offset nb_cn_small = n_gpfs_blk/naggs;
|
||||
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
|
||||
ADIO_Offset naggs_small = naggs - naggs_large;
|
||||
|
||||
for (i=0; i<naggs; i++)
|
||||
if (i < naggs_small) fd_size[i] = nb_cn_small * blksize;
|
||||
else fd_size[i] = (nb_cn_small+1) * blksize;
|
||||
|
||||
# if AGG_DEBUG
|
||||
DBG_FPRINTF(stderr,"%s(%d): "
|
||||
"gpfs_ub %llu, "
|
||||
"gpfs_lb %llu, "
|
||||
"gpfs_ub_rdoff %llu, "
|
||||
"gpfs_lb_rdoff %llu, "
|
||||
"fd_gpfs_range %llu, "
|
||||
"n_gpfs_blk %llu, "
|
||||
"nb_cn_small %llu, "
|
||||
"naggs_large %llu, "
|
||||
"naggs_small %llu, "
|
||||
"\n",
|
||||
myname,__LINE__,
|
||||
gpfs_ub ,
|
||||
gpfs_lb ,
|
||||
gpfs_ub_rdoff,
|
||||
gpfs_lb_rdoff,
|
||||
fd_gpfs_range,
|
||||
n_gpfs_blk ,
|
||||
nb_cn_small ,
|
||||
naggs_large ,
|
||||
naggs_small
|
||||
);
|
||||
# endif
|
||||
|
||||
fd_size[0] -= gpfs_lb_rdoff;
|
||||
fd_size[naggs-1] -= gpfs_ub_rdoff;
|
||||
|
||||
/* compute the file domain for each aggr */
|
||||
ADIO_Offset offset = min_st_offset;
|
||||
for (aggr=0; aggr<naggs; aggr++) {
|
||||
fd_start[aggr] = offset;
|
||||
fd_end [aggr] = offset + fd_size[aggr] - 1;
|
||||
offset += fd_size[aggr];
|
||||
}
|
||||
|
||||
*fd_size_ptr = fd_size[0];
|
||||
*min_st_offset_ptr = min_st_offset;
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5005, 0, NULL);
|
||||
#endif
|
||||
ADIOI_Free (fd_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* When a process is an IO aggregator, this will return its index in the aggrs list.
|
||||
* Otherwise, this will return -1
|
||||
*/
|
||||
int ADIOI_BGL_Aggrs_index( ADIO_File fd, int myrank )
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<fd->hints->cb_nodes; i++)
|
||||
if (fd->hints->ranklist[i] == myrank) return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation
|
||||
* is specific for static file domain partitioning.
|
||||
*
|
||||
* ADIOI_Calc_my_req() calculate what portions of the access requests
|
||||
* ADIOI_Calc_my_req() - calculate what portions of the access requests
|
||||
* of this process are located in the file domains of various processes
|
||||
* (including this one)
|
||||
*/
|
||||
void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
|
||||
void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
|
||||
int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end, ADIO_Offset fd_size,
|
||||
@ -629,12 +578,17 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int **buf_idx_ptr)
|
||||
/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets?
|
||||
They are used as memory buffer indices so it seems like the 2G limit is in effect */
|
||||
{
|
||||
int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
|
||||
int i, l, proc;
|
||||
ADIO_Offset fd_len, rem_len, curr_idx, off;
|
||||
ADIOI_Access *my_req;
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5024, 0, NULL);
|
||||
#endif
|
||||
|
||||
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int));
|
||||
count_my_req_per_proc = *count_my_req_per_proc_ptr;
|
||||
@ -656,10 +610,10 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
|
||||
*/
|
||||
for (i=0; i < contig_access_count; i++) {
|
||||
|
||||
/* When there is no data being processed, bypass this loop */
|
||||
if (len_list[i] == 0) continue;
|
||||
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
fd_len = len_list[i];
|
||||
/* note: we set fd_len to be the total size of the access. then
|
||||
@ -710,20 +664,24 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
/* now fill in my_req */
|
||||
curr_idx = 0;
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
|
||||
/* When there is no data being processed, bypass this loop */
|
||||
if (len_list[i] == 0) continue;
|
||||
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
fd_len = len_list[i];
|
||||
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
|
||||
fd_start, fd_end);
|
||||
|
||||
/* for each separate contiguous access from this process */
|
||||
if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
|
||||
if (buf_idx[proc] == -1)
|
||||
{
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
buf_idx[proc] = (int) curr_idx;
|
||||
}
|
||||
|
||||
l = my_req[proc].count;
|
||||
curr_idx += (int) fd_len; /* NOTE: Why is curr_idx an int? Fix? */
|
||||
curr_idx += fd_len;
|
||||
|
||||
rem_len = len_list[i] - fd_len;
|
||||
|
||||
@ -733,6 +691,7 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
* and the associated count.
|
||||
*/
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(fd_len == (int) fd_len);
|
||||
my_req[proc].lens[l] = (int) fd_len;
|
||||
my_req[proc].count++;
|
||||
|
||||
@ -742,13 +701,18 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len,
|
||||
fd_size, fd_start, fd_end);
|
||||
|
||||
if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
|
||||
if (buf_idx[proc] == -1)
|
||||
{
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
buf_idx[proc] = (int) curr_idx;
|
||||
}
|
||||
|
||||
l = my_req[proc].count;
|
||||
curr_idx += fd_len;
|
||||
rem_len -= fd_len;
|
||||
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(fd_len == (int) fd_len);
|
||||
my_req[proc].lens[l] = (int) fd_len;
|
||||
my_req[proc].count++;
|
||||
}
|
||||
@ -757,27 +721,26 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
|
||||
#ifdef AGG_DEBUG
|
||||
for (i=0; i<nprocs; i++) {
|
||||
if (count_my_req_per_proc[i] > 0) {
|
||||
FPRINTF(stdout, "data needed from %d (count = %d):\n", i,
|
||||
DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i,
|
||||
my_req[i].count);
|
||||
for (l=0; l < my_req[i].count; l++) {
|
||||
FPRINTF(stdout, " off[%d] = %Ld, len[%d] = %d\n", l,
|
||||
DBG_FPRINTF(stderr, " off[%d] = %lld, len[%d] = %d\n", l,
|
||||
my_req[i].offsets[l], l, my_req[i].lens[l]);
|
||||
}
|
||||
}
|
||||
DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
|
||||
}
|
||||
#if 0
|
||||
for (i=0; i<nprocs; i++) {
|
||||
FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
*count_my_req_procs_ptr = count_my_req_procs;
|
||||
*buf_idx_ptr = buf_idx;
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5025, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* ADIOI_Calc_others_req
|
||||
* ADIOI_Calc_others_req (copied to bgl and switched to all to all for performance)
|
||||
*
|
||||
* param[in] count_my_req_procs Number of processes whose file domain my
|
||||
* request touches.
|
||||
@ -826,7 +789,9 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
*recvBufForLens =(void*)0xFFFFFFFF;
|
||||
|
||||
/* first find out how much to send/recv and from/to whom */
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5026, 0, NULL);
|
||||
#endif
|
||||
/* Send 1 int to each process. count_my_req_per_proc[i] is the number of
|
||||
* requests that my process will do to the file domain owned by process[i].
|
||||
* Receive 1 int from each process. count_others_req_per_proc[i] is the number of
|
||||
@ -866,9 +831,9 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
others_req[i].lens = (int *)
|
||||
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(int));
|
||||
|
||||
if ( (unsigned)others_req[i].offsets < (unsigned)recvBufForOffsets )
|
||||
if ( (MPIR_Upint)others_req[i].offsets < (MPIR_Upint)recvBufForOffsets )
|
||||
recvBufForOffsets = others_req[i].offsets;
|
||||
if ( (unsigned)others_req[i].lens < (unsigned)recvBufForLens )
|
||||
if ( (MPIR_Upint)others_req[i].lens < (MPIR_Upint)recvBufForLens )
|
||||
recvBufForLens = others_req[i].lens;
|
||||
|
||||
others_req[i].mem_ptrs = (MPI_Aint *)
|
||||
@ -883,6 +848,9 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
others_req[i].lens = NULL;
|
||||
}
|
||||
}
|
||||
/* If no recv buffer was allocated in the loop above, make it NULL */
|
||||
if ( recvBufForOffsets == (void*)0xFFFFFFFF) recvBufForOffsets = NULL;
|
||||
if ( recvBufForLens == (void*)0xFFFFFFFF) recvBufForLens = NULL;
|
||||
|
||||
/* Now send the calculated offsets and lengths to respective processes */
|
||||
|
||||
@ -894,14 +862,18 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
for (i=0; i<nprocs; i++)
|
||||
{
|
||||
if ( (my_req[i].count) &&
|
||||
((unsigned)my_req[i].offsets <= (unsigned)sendBufForOffsets) )
|
||||
((MPIR_Upint)my_req[i].offsets <= (MPIR_Upint)sendBufForOffsets) )
|
||||
sendBufForOffsets = my_req[i].offsets;
|
||||
|
||||
if ( (my_req[i].count) &&
|
||||
((unsigned)my_req[i].lens <= (unsigned)sendBufForLens) )
|
||||
((MPIR_Upint)my_req[i].lens <= (MPIR_Upint)sendBufForLens) )
|
||||
sendBufForLens = my_req[i].lens;
|
||||
}
|
||||
|
||||
/* If no send buffer was found in the loop above, make it NULL */
|
||||
if ( sendBufForOffsets == (void*)0xFFFFFFFF) sendBufForOffsets = NULL;
|
||||
if ( sendBufForLens == (void*)0xFFFFFFFF) sendBufForLens = NULL;
|
||||
|
||||
/* Calculate the displacements from the sendBufForOffsets/Lens */
|
||||
for (i=0; i<nprocs; i++)
|
||||
{
|
||||
@ -910,16 +882,20 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
if ( scounts[i] == 0 )
|
||||
sdispls[i] = 0;
|
||||
else
|
||||
sdispls[i] = ( (unsigned)my_req[i].offsets -
|
||||
(unsigned)sendBufForOffsets ) / sizeof(ADIO_Offset);
|
||||
sdispls[i] = (int)
|
||||
( ( (MPIR_Upint)my_req[i].offsets -
|
||||
(MPIR_Upint)sendBufForOffsets ) /
|
||||
(MPIR_Upint)sizeof(ADIO_Offset) );
|
||||
|
||||
// Receive these offsets from process i.
|
||||
rcounts[i] = count_others_req_per_proc[i];
|
||||
if ( rcounts[i] == 0 )
|
||||
rdispls[i] = 0;
|
||||
else
|
||||
rdispls[i] = ( (unsigned)others_req[i].offsets -
|
||||
(unsigned)recvBufForOffsets ) / sizeof(ADIO_Offset);
|
||||
rdispls[i] = (int)
|
||||
( ( (MPIR_Upint)others_req[i].offsets -
|
||||
(MPIR_Upint)recvBufForOffsets ) /
|
||||
(MPIR_Upint)sizeof(ADIO_Offset) );
|
||||
}
|
||||
|
||||
/* Exchange the offsets */
|
||||
@ -940,16 +916,20 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
if ( scounts[i] == 0 )
|
||||
sdispls[i] = 0;
|
||||
else
|
||||
sdispls[i] = ( (unsigned)my_req[i].lens -
|
||||
(unsigned)sendBufForLens ) / sizeof(int);
|
||||
sdispls[i] = (int)
|
||||
( ( (MPIR_Upint)my_req[i].lens -
|
||||
(MPIR_Upint)sendBufForLens ) /
|
||||
(MPIR_Upint) sizeof(int) );
|
||||
|
||||
// Receive these offsets from process i.
|
||||
rcounts[i] = count_others_req_per_proc[i];
|
||||
if ( rcounts[i] == 0 )
|
||||
rdispls[i] = 0;
|
||||
else
|
||||
rdispls[i] = ( (unsigned)others_req[i].lens -
|
||||
(unsigned)recvBufForLens ) / sizeof(int);
|
||||
rdispls[i] = (int)
|
||||
( ( (MPIR_Upint)others_req[i].lens -
|
||||
(MPIR_Upint)recvBufForLens ) /
|
||||
(MPIR_Upint) sizeof(int) );
|
||||
}
|
||||
|
||||
/* Exchange the lengths */
|
||||
@ -967,4 +947,7 @@ void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
ADIOI_Free (rdispls);
|
||||
|
||||
*count_others_req_procs_ptr = count_others_req_procs;
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5027, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_aggrs.h
|
||||
* \brief ???
|
||||
@ -22,13 +23,22 @@
|
||||
#include "adio.h"
|
||||
#include <sys/stat.h>
|
||||
|
||||
extern int *aggrsInPset; /* defined in ad_bgl_aggrs.c */
|
||||
#if !defined(GPFS_SUPER_MAGIC)
|
||||
#define GPFS_SUPER_MAGIC (0x47504653)
|
||||
#endif
|
||||
|
||||
#if !defined(PVFS2_SUPER_MAGIC)
|
||||
#define PVFS2_SUPER_MAGIC (0x20030528)
|
||||
#endif
|
||||
|
||||
/* File system (BGL) specific information -
|
||||
hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */
|
||||
typedef struct ADIOI_BGL_fs_s {
|
||||
__blksize_t blksize;
|
||||
int fsync_aggr; /* "fsync aggregation" flags (below) */
|
||||
#define ADIOI_BGL_FSYNC_AGGREGATION_DISABLED 0x00
|
||||
#define ADIOI_BGL_FSYNC_AGGREGATION_ENABLED 0x01
|
||||
#define ADIOI_BGL_FSYNC_AGGREGATOR 0x10 /* This rank is an aggregator */
|
||||
} ADIOI_BGL_fs;
|
||||
|
||||
/* generate a list of I/O aggregators that utilizes BGL-PSET orginization. */
|
||||
@ -60,7 +70,7 @@
|
||||
|
||||
/* overriding ADIOI_Calc_my_req for the default implementation is specific for
|
||||
static file domain partitioning */
|
||||
void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
|
||||
void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
|
||||
int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end, ADIO_Offset fd_size,
|
||||
|
@ -1,7 +1,8 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_open.c
|
||||
* \file ad_bgl_close.c
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_fcntl.c
|
||||
* \brief ???
|
||||
|
90
ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c
Обычный файл
90
ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c
Обычный файл
@ -0,0 +1,90 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_flush.c
|
||||
* \brief Scalable flush based on underlying filesystem and psets
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_bgl.h"
|
||||
#include "ad_bgl_aggrs.h"
|
||||
|
||||
void ADIOI_BGL_Flush(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err=0;
|
||||
static char myname[] = "ADIOI_BGL_FLUSH";
|
||||
|
||||
|
||||
if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATION_ENABLED)
|
||||
{
|
||||
int rank;
|
||||
|
||||
/* Barrier so we can collectively do fewer fsync's */
|
||||
MPI_Barrier(fd->comm);
|
||||
|
||||
MPI_Comm_rank(fd->comm, &rank);
|
||||
|
||||
/* All ranks marked as "fsync aggregators" should fsync.
|
||||
(We currently only do one fsync on rank 0 but this is general
|
||||
enough to support >1 aggregator using allreduce to get the
|
||||
results instead of simply bcast'ing the results from rank 0.)*/
|
||||
if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATOR)
|
||||
{
|
||||
err = fsync(fd->fd_sys);
|
||||
DBG_FPRINTF(stderr,"aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
/* We want errno, not the return code if it failed */
|
||||
if (err == -1) err = errno;
|
||||
else err = 0;
|
||||
}
|
||||
/* Just pick an errno (using unsigned MPI_MAX) from any failures */
|
||||
MPI_Allreduce( MPI_IN_PLACE, (unsigned*)&err, 1, MPI_UNSIGNED, MPI_MAX, fd->comm);
|
||||
DBGV_FPRINTF(stderr,"aggregation result:fsync %s, errno %#X,\n",fd->filename, err);
|
||||
|
||||
if (err) /* if it's non-zero, it must be an errno */
|
||||
{
|
||||
errno = err;
|
||||
err = -1;
|
||||
}
|
||||
}
|
||||
else /* Non-aggregated fsync */
|
||||
{
|
||||
#ifdef USE_DBG_LOGGING
|
||||
int rank;
|
||||
#endif
|
||||
err = fsync(fd->fd_sys);
|
||||
#ifdef USE_DBG_LOGGING
|
||||
MPI_Comm_rank(fd->comm, &rank);
|
||||
|
||||
if(rank == 0)
|
||||
{
|
||||
DBG_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
}
|
||||
else
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
DBGT_FPRINTF(stderr,"fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
@ -38,8 +38,8 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
|
||||
MPI_Info info;
|
||||
char *value;
|
||||
int flag, intval, tmp_val, nprocs, nprocs_is_valid = 0;
|
||||
static char myname[] = "ADIOI_GEN_SETINFO";
|
||||
int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0;
|
||||
static char myname[] = "ADIOI_BGL_SETINFO";
|
||||
|
||||
int did_anything = 0;
|
||||
|
||||
@ -61,15 +61,15 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
did_anything = 1;
|
||||
|
||||
/* buffer size for collective I/O */
|
||||
MPI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
|
||||
ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
|
||||
fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
|
||||
|
||||
/* default is to let romio automatically decide when to use
|
||||
* collective buffering
|
||||
*/
|
||||
MPI_Info_set(info, "romio_cb_read", "enable");
|
||||
ADIOI_Info_set(info, "romio_cb_read", "enable");
|
||||
fd->hints->cb_read = ADIOI_HINT_ENABLE;
|
||||
MPI_Info_set(info, "romio_cb_write", "enable");
|
||||
ADIOI_Info_set(info, "romio_cb_write", "enable");
|
||||
fd->hints->cb_write = ADIOI_HINT_ENABLE;
|
||||
|
||||
if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
|
||||
@ -78,30 +78,54 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* number of processes that perform I/O in collective I/O */
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
nprocs_is_valid = 1;
|
||||
sprintf(value, "%d", nprocs);
|
||||
MPI_Info_set(info, "cb_nodes", value);
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
|
||||
ADIOI_Info_set(info, "cb_nodes", value);
|
||||
fd->hints->cb_nodes = -1;
|
||||
|
||||
/* hint indicating that no indep. I/O will be performed on this file */
|
||||
MPI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = 0;
|
||||
/* deferred_open derrived from no_indep_rw and cb_{read,write} */
|
||||
|
||||
/* bgl is not implementing file realms (ADIOI_IOStridedColl),
|
||||
initialize to disabled it. */
|
||||
/* hint instructing the use of persistent file realms */
|
||||
ADIOI_Info_set(info, "romio_cb_pfr", "disable");
|
||||
fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
|
||||
|
||||
/* hint guiding the assignment of persistent file realms */
|
||||
ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
|
||||
fd->hints->cb_fr_type = ADIOI_FR_AAR;
|
||||
|
||||
/* hint to align file realms with a certain byte value */
|
||||
ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
|
||||
fd->hints->cb_fr_alignment = 1;
|
||||
|
||||
/* hint to set a threshold percentage for a datatype's size/extent at
|
||||
* which data sieving should be done in collective I/O */
|
||||
ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
|
||||
fd->hints->cb_ds_threshold = 0;
|
||||
|
||||
/* hint to switch between point-to-point or all-to-all for two-phase */
|
||||
ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
|
||||
fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
|
||||
|
||||
/* deferred_open derived from no_indep_rw and cb_{read,write} */
|
||||
fd->hints->deferred_open = 0;
|
||||
|
||||
/* buffer size for data sieving in independent reads */
|
||||
MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
|
||||
ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
|
||||
fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
|
||||
|
||||
/* buffer size for data sieving in independent writes */
|
||||
MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
|
||||
ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
|
||||
fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
|
||||
|
||||
if(fd->file_system == ADIO_UFS)
|
||||
{
|
||||
/* default for ufs/pvfs is to disable data sieving */
|
||||
MPI_Info_set(info, "romio_ds_read", "disable");
|
||||
ADIOI_Info_set(info, "romio_ds_read", "disable");
|
||||
fd->hints->ds_read = ADIOI_HINT_DISABLE;
|
||||
MPI_Info_set(info, "romio_ds_write", "disable");
|
||||
ADIOI_Info_set(info, "romio_ds_write", "disable");
|
||||
fd->hints->ds_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else
|
||||
@ -109,18 +133,23 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* default is to let romio automatically decide when to use data
|
||||
* sieving
|
||||
*/
|
||||
MPI_Info_set(info, "romio_ds_read", "automatic");
|
||||
ADIOI_Info_set(info, "romio_ds_read", "automatic");
|
||||
fd->hints->ds_read = ADIOI_HINT_AUTO;
|
||||
MPI_Info_set(info, "romio_ds_write", "automatic");
|
||||
ADIOI_Info_set(info, "romio_ds_write", "automatic");
|
||||
fd->hints->ds_write = ADIOI_HINT_AUTO;
|
||||
}
|
||||
|
||||
/* still to do: tune this a bit for a variety of file systems. there's
|
||||
* no good default value so just leave it unset */
|
||||
fd->hints->min_fdomain_size = 0;
|
||||
fd->hints->striping_unit = 0;
|
||||
|
||||
fd->hints->initialized = 1;
|
||||
}
|
||||
|
||||
/* add in user's info if supplied */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval=atoi(value)) > 0)) {
|
||||
tmp_val = intval;
|
||||
@ -135,30 +164,106 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
MPI_Info_set(info, "cb_buffer_size", value);
|
||||
ADIOI_Info_set(info, "cb_buffer_size", value);
|
||||
fd->hints->cb_buffer_size = intval;
|
||||
|
||||
}
|
||||
#if 0
|
||||
/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
|
||||
/* aligning file realms to certain sizes (e.g. stripe sizes)
|
||||
* may benefit I/O performance */
|
||||
ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval=atoi(value)) > 0)) {
|
||||
tmp_val = intval;
|
||||
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (tmp_val != intval) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_cb_fr_alignment",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
|
||||
fd->hints->cb_fr_alignment = intval;
|
||||
|
||||
}
|
||||
|
||||
/* for collective I/O, try to be smarter about when to do data sieving
|
||||
* using a specific threshold for the datatype size/extent
|
||||
* (percentage 0-100%) */
|
||||
ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval=atoi(value)) > 0)) {
|
||||
tmp_val = intval;
|
||||
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (tmp_val != intval) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_cb_ds_threshold",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
|
||||
fd->hints->cb_ds_threshold = intval;
|
||||
|
||||
}
|
||||
ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
ADIOI_Info_set(info, "romio_cb_alltoall", value);
|
||||
fd->hints->cb_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
|
||||
ADIOI_Info_set(info, "romio_cb_alltoall", value);
|
||||
fd->hints->cb_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
ADIOI_Info_set(info, "romio_cb_alltoall", value);
|
||||
fd->hints->cb_read = ADIOI_HINT_AUTO;
|
||||
}
|
||||
|
||||
tmp_val = fd->hints->cb_alltoall;
|
||||
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (tmp_val != fd->hints->cb_alltoall) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_cb_alltoall",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
#endif
|
||||
/* new hints for enabling/disabling coll. buffering on
|
||||
* reads/writes
|
||||
*/
|
||||
MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
|
||||
ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
MPI_Info_set(info, "romio_cb_read", value);
|
||||
ADIOI_Info_set(info, "romio_cb_read", value);
|
||||
fd->hints->cb_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
|
||||
/* romio_cb_read overrides no_indep_rw */
|
||||
MPI_Info_set(info, "romio_cb_read", value);
|
||||
MPI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
ADIOI_Info_set(info, "romio_cb_read", value);
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->cb_read = ADIOI_HINT_DISABLE;
|
||||
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(info, "romio_cb_read", value);
|
||||
ADIOI_Info_set(info, "romio_cb_read", value);
|
||||
fd->hints->cb_read = ADIOI_HINT_AUTO;
|
||||
}
|
||||
|
||||
@ -174,24 +279,25 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag);
|
||||
ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
MPI_Info_set(info, "romio_cb_write", value);
|
||||
ADIOI_Info_set(info, "romio_cb_write", value);
|
||||
fd->hints->cb_write = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
|
||||
{
|
||||
/* romio_cb_write overrides no_indep_rw, too */
|
||||
MPI_Info_set(info, "romio_cb_write", value);
|
||||
MPI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
ADIOI_Info_set(info, "romio_cb_write", value);
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->cb_write = ADIOI_HINT_DISABLE;
|
||||
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") ||
|
||||
!strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(info, "romio_cb_write", value);
|
||||
ADIOI_Info_set(info, "romio_cb_write", value);
|
||||
fd->hints->cb_write = ADIOI_HINT_AUTO;
|
||||
}
|
||||
|
||||
@ -208,23 +314,81 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
|
||||
/* enable/disable persistent file realms for collective I/O */
|
||||
/* may want to check for no_indep_rdwr hint as well */
|
||||
ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
ADIOI_Info_set(info, "romio_cb_pfr", value);
|
||||
fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
|
||||
ADIOI_Info_set(info, "romio_cb_pfr", value);
|
||||
fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
ADIOI_Info_set(info, "romio_cb_pfr", value);
|
||||
fd->hints->cb_pfr = ADIOI_HINT_AUTO;
|
||||
}
|
||||
|
||||
tmp_val = fd->hints->cb_pfr;
|
||||
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (tmp_val != fd->hints->cb_pfr) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_cb_pfr",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
/* file realm assignment types ADIOI_FR_AAR(0),
|
||||
ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
|
||||
a regular fr size in bytes. probably not the best way... */
|
||||
ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval=atoi(value)) >= -2)) {
|
||||
tmp_val = intval;
|
||||
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (tmp_val != intval) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_cb_fr_type",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
ADIOI_Info_set(info, "romio_cb_fr_type", value);
|
||||
fd->hints->cb_fr_type = intval;
|
||||
|
||||
}
|
||||
#endif
|
||||
/* new hint for specifying no indep. read/write will be performed */
|
||||
MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag);
|
||||
ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
|
||||
/* if 'no_indep_rw' set, also hint that we will do
|
||||
* collective buffering: if we aren't doing independent io,
|
||||
* then we have to do collective */
|
||||
MPI_Info_set(info, "romio_no_indep_rw", value);
|
||||
MPI_Info_set(info, "romio_cb_write", "enable");
|
||||
MPI_Info_set(info, "romio_cb_read", "enable");
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", value);
|
||||
ADIOI_Info_set(info, "romio_cb_write", "enable");
|
||||
ADIOI_Info_set(info, "romio_cb_read", "enable");
|
||||
fd->hints->no_indep_rw = 1;
|
||||
fd->hints->cb_read = 1;
|
||||
fd->hints->cb_write = 1;
|
||||
tmp_val = 1;
|
||||
}
|
||||
else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
|
||||
MPI_Info_set(info, "romio_no_indep_rw", value);
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", value);
|
||||
fd->hints->no_indep_rw = 0;
|
||||
tmp_val = 0;
|
||||
}
|
||||
@ -246,64 +410,80 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* new hints for enabling/disabling data sieving on
|
||||
* reads/writes
|
||||
*/
|
||||
MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
MPI_Info_set(info, "romio_ds_read", value);
|
||||
ADIOI_Info_set(info, "romio_ds_read", value);
|
||||
fd->hints->ds_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
|
||||
MPI_Info_set(info, "romio_ds_read", value);
|
||||
ADIOI_Info_set(info, "romio_ds_read", value);
|
||||
fd->hints->ds_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(info, "romio_ds_read", value);
|
||||
ADIOI_Info_set(info, "romio_ds_read", value);
|
||||
fd->hints->ds_read = ADIOI_HINT_AUTO;
|
||||
}
|
||||
/* otherwise ignore */
|
||||
}
|
||||
MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag) {
|
||||
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
|
||||
MPI_Info_set(info, "romio_ds_write", value);
|
||||
ADIOI_Info_set(info, "romio_ds_write", value);
|
||||
fd->hints->ds_write = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
|
||||
MPI_Info_set(info, "romio_ds_write", value);
|
||||
ADIOI_Info_set(info, "romio_ds_write", value);
|
||||
fd->hints->ds_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(info, "romio_ds_write", value);
|
||||
ADIOI_Info_set(info, "romio_ds_write", value);
|
||||
fd->hints->ds_write = ADIOI_HINT_AUTO;
|
||||
}
|
||||
/* otherwise ignore */
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval = atoi(value)) > 0)) {
|
||||
MPI_Info_set(info, "ind_wr_buffer_size", value);
|
||||
ADIOI_Info_set(info, "ind_wr_buffer_size", value);
|
||||
fd->hints->ind_wr_buffer_size = intval;
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval = atoi(value)) > 0)) {
|
||||
MPI_Info_set(info, "ind_rd_buffer_size", value);
|
||||
ADIOI_Info_set(info, "ind_rd_buffer_size", value);
|
||||
fd->hints->ind_rd_buffer_size = intval;
|
||||
}
|
||||
|
||||
memset( value, 0, MPI_MAX_INFO_VAL+1 );
|
||||
MPI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if ( flag && ((intval = atoi(value)) > 0) ) {
|
||||
ADIOI_Info_set(info, "romio_min_fdomain_size", value);
|
||||
fd->hints->min_fdomain_size = intval;
|
||||
}
|
||||
/* Now we use striping unit in common code so we should
|
||||
process hints for it. */
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if ( flag && ((intval = atoi(value)) > 0) ) {
|
||||
ADIOI_Info_set(info, "striping_unit", value);
|
||||
fd->hints->striping_unit = intval;
|
||||
}
|
||||
|
||||
memset( value, 0, MPI_MAX_INFO_VAL+1 );
|
||||
ADIOI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval = atoi(value)) > 0)) {
|
||||
|
||||
did_anything = 1;
|
||||
MPI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
|
||||
ADIOI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
|
||||
fd->hints->cb_nodes = intval;
|
||||
}
|
||||
}
|
||||
@ -312,24 +492,30 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (did_anything) {
|
||||
ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes);
|
||||
}
|
||||
|
||||
/* deferred_open won't be set by callers, but if the user doesn't
|
||||
* explicitly disable collecitve buffering (two-phase) and does hint that
|
||||
* io w/o independent io is going on, we'll set this internal hint as a
|
||||
* convenience */
|
||||
if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE)
|
||||
&& (fd->hints->cb_write != ADIOI_HINT_DISABLE)
|
||||
&& fd->hints->no_indep_rw ) )
|
||||
{
|
||||
fd->hints->deferred_open = 1;
|
||||
} else {
|
||||
/* setting romio_no_indep_rw enable and romio_cb_{read,write}
|
||||
* disable at the same time doesn't make sense. honor
|
||||
* romio_cb_{read,write} and force the no_indep_rw hint to
|
||||
* 'disable' */
|
||||
MPI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
/* ignore defered open hints and do not enable it for bluegene: need all
|
||||
* processors in the open path so we can stat-and-broadcast the blocksize
|
||||
*/
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = 0;
|
||||
fd->hints->deferred_open = 0;
|
||||
|
||||
/* BobC commented this out, but since hint processing runs on both bgl and
|
||||
* bglockless, we need to keep DS writes enabled on gpfs and disabled on
|
||||
* PVFS */
|
||||
if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
|
||||
/* disable data sieving for fs that do not
|
||||
support file locking */
|
||||
ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
/* get rid of this value if it is set */
|
||||
ADIOI_Info_delete(info, "ind_wr_buffer_size");
|
||||
}
|
||||
/* note: leave ind_wr_buffer_size alone; used for other cases
|
||||
* as well. -- Rob Ross, 04/22/2003
|
||||
*/
|
||||
ADIOI_Info_set(info, "romio_ds_write", "disable");
|
||||
fd->hints->ds_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
|
||||
ADIOI_Free(value);
|
||||
|
@ -15,6 +15,181 @@
|
||||
#include "ad_bgl.h"
|
||||
#include "ad_bgl_aggrs.h"
|
||||
|
||||
#include <sys/statfs.h>
|
||||
#include <sys/vfs.h>
|
||||
|
||||
/* COPIED FROM ad_fstype.c since it is static in that file
|
||||
|
||||
ADIO_FileSysType_parentdir - determines a string pathname for the
|
||||
parent directory of a given filename.
|
||||
|
||||
Input Parameters:
|
||||
. filename - pointer to file name character array
|
||||
|
||||
Output Parameters:
|
||||
. dirnamep - pointer to location in which to store a pointer to a string
|
||||
|
||||
Note that the caller should free the memory located at the pointer returned
|
||||
after the string is no longer needed.
|
||||
*/
|
||||
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX 65535
|
||||
#endif
|
||||
|
||||
/* In a strict ANSI environment, S_ISLNK may not be defined. Fix that
|
||||
here. We assume that S_ISLNK is *always* defined as a macro. If
|
||||
that is not universally true, then add a test to the romio
|
||||
configure that trys to link a program that references S_ISLNK */
|
||||
#if !defined(S_ISLNK)
|
||||
# if defined(S_IFLNK)
|
||||
/* Check for the link bit */
|
||||
# define S_ISLNK(mode) ((mode) & S_IFLNK)
|
||||
# else
|
||||
/* no way to check if it is a link, so say false */
|
||||
# define S_ISLNK(mode) 0
|
||||
# endif
|
||||
#endif /* !(S_ISLNK) */
|
||||
|
||||
/* ADIO_FileSysType_parentdir
|
||||
*
|
||||
* Returns pointer to string in dirnamep; that string is allocated with
|
||||
* strdup and must be free()'d.
|
||||
*/
|
||||
static void ADIO_FileSysType_parentdir(char *filename, char **dirnamep)
|
||||
{
|
||||
int err;
|
||||
char *dir = NULL, *slash;
|
||||
struct stat statbuf;
|
||||
|
||||
err = lstat(filename, &statbuf);
|
||||
|
||||
if (err || (!S_ISLNK(statbuf.st_mode))) {
|
||||
/* no such file, or file is not a link; these are the "normal"
|
||||
* cases where we can just return the parent directory.
|
||||
*/
|
||||
dir = ADIOI_Strdup(filename);
|
||||
}
|
||||
else {
|
||||
/* filename is a symlink. we've presumably already tried
|
||||
* to stat it and found it to be missing (dangling link),
|
||||
* but this code doesn't care if the target is really there
|
||||
* or not.
|
||||
*/
|
||||
int namelen;
|
||||
char *linkbuf;
|
||||
|
||||
linkbuf = ADIOI_Malloc(PATH_MAX+1);
|
||||
namelen = readlink(filename, linkbuf, PATH_MAX+1);
|
||||
if (namelen == -1) {
|
||||
/* something strange has happened between the time that
|
||||
* we determined that this was a link and the time that
|
||||
* we attempted to read it; punt and use the old name.
|
||||
*/
|
||||
dir = ADIOI_Strdup(filename);
|
||||
}
|
||||
else {
|
||||
/* successfully read the link */
|
||||
linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */
|
||||
dir = ADIOI_Strdup(linkbuf);
|
||||
ADIOI_Free(linkbuf);
|
||||
}
|
||||
}
|
||||
|
||||
slash = strrchr(dir, '/');
|
||||
if (!slash) ADIOI_Strncpy(dir, ".", 2);
|
||||
else {
|
||||
if (slash == dir) *(dir + 1) = '\0';
|
||||
else *slash = '\0';
|
||||
}
|
||||
|
||||
*dirnamep = dir;
|
||||
return;
|
||||
}
|
||||
|
||||
static void scaleable_stat(ADIO_File fd)
|
||||
{
|
||||
struct stat64 bgl_stat;
|
||||
struct statfs bgl_statfs;
|
||||
int rank, rc;
|
||||
char * dir;
|
||||
long buf[2];
|
||||
MPI_Comm_rank(fd->comm, &rank);
|
||||
|
||||
if (rank == 0) {
|
||||
/* Get the (real) underlying file system block size */
|
||||
rc = stat64(fd->filename, &bgl_stat);
|
||||
if (rc >= 0)
|
||||
{
|
||||
buf[0] = bgl_stat.st_blksize;
|
||||
DBGV_FPRINTF(stderr,"Successful stat '%s'. Blocksize=%ld\n",
|
||||
fd->filename,bgl_stat.st_blksize);
|
||||
}
|
||||
else
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n",
|
||||
fd->filename,rc,errno);
|
||||
}
|
||||
/* Get the (real) underlying file system type so we can
|
||||
* plan our fsync scaling strategy */
|
||||
rc = statfs(fd->filename,&bgl_statfs);
|
||||
if (rc >= 0)
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n",
|
||||
fd->filename,bgl_statfs.f_type);
|
||||
buf[1] = bgl_statfs.f_type;
|
||||
}
|
||||
else
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",
|
||||
fd->filename,rc,errno);
|
||||
ADIO_FileSysType_parentdir(fd->filename, &dir);
|
||||
rc = statfs(dir,&bgl_statfs);
|
||||
if (rc >= 0)
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n",dir,bgl_statfs.f_type);
|
||||
buf[1] = bgl_statfs.f_type;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Hmm. Guess we'll assume the worst-case, that it's not GPFS
|
||||
* or PVFS2 below */
|
||||
buf[1] = -1; /* bogus magic number */
|
||||
DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno);
|
||||
}
|
||||
free(dir);
|
||||
}
|
||||
}
|
||||
/* now we can broadcast the stat/statfs data to everyone else */
|
||||
MPI_Bcast(buf, 2, MPI_LONG, 0, fd->comm);
|
||||
bgl_stat.st_blksize = buf[0];
|
||||
bgl_statfs.f_type = buf[1];
|
||||
|
||||
/* data from stat64 */
|
||||
/* store the blksize in the file system specific storage */
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
|
||||
|
||||
/* data from statfs */
|
||||
if ((bgl_statfs.f_type == GPFS_SUPER_MAGIC) ||
|
||||
(bgl_statfs.f_type == PVFS2_SUPER_MAGIC))
|
||||
{
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr =
|
||||
ADIOI_BGL_FSYNC_AGGREGATION_ENABLED;
|
||||
|
||||
/* Only one rank is an "fsync aggregator" because only one
|
||||
* fsync is needed */
|
||||
if (rank == 0)
|
||||
{
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr |=
|
||||
ADIOI_BGL_FSYNC_AGGREGATOR;
|
||||
DBG_FPRINTF(stderr,"fsync aggregator %d\n",rank);
|
||||
}
|
||||
else ; /* aggregation enabled but this rank is not an aggregator*/
|
||||
}
|
||||
else; /* Other filesystems default to no fsync aggregation */
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode;
|
||||
@ -41,8 +216,14 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_open_a, 0, NULL);
|
||||
#endif
|
||||
fd->fd_sys = open(fd->filename, amode, perm);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_open_b, 0, NULL);
|
||||
#endif
|
||||
DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno);
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
|
||||
@ -51,17 +232,28 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
|
||||
if(fd->fd_sys != -1)
|
||||
{
|
||||
struct stat64 bgl_stat;
|
||||
int rc = stat64(fd->filename,&bgl_stat);
|
||||
if (rc >= 0)
|
||||
{
|
||||
/* store the blksize in the file system specific storage */
|
||||
struct statfs bgl_statfs;
|
||||
char* dir;
|
||||
int rc;
|
||||
|
||||
/* Initialize the ad_bgl file system specific information */
|
||||
AD_BGL_assert(fd->fs_ptr == NULL);
|
||||
fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs));
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
|
||||
/* FPRINTF(stderr,"%s(%d):Successful stat '%s'. Blocksize=%ld\n",myname,__LINE__,fd->filename,bgl_stat.st_blksize);*/
|
||||
}
|
||||
/* else
|
||||
FPRINTF(stderr,"%s(%d):Stat '%s' failed with rc=%d, errno=%d\n",myname,__LINE__,fd->filename,rc,errno);*/
|
||||
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = 1048576; /* default to 1M */
|
||||
|
||||
/* default is no fsync aggregation */
|
||||
((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr =
|
||||
ADIOI_BGL_FSYNC_AGGREGATION_DISABLED;
|
||||
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL);
|
||||
#endif
|
||||
scaleable_stat(fd);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (fd->fd_sys == -1) {
|
||||
@ -112,3 +304,6 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
/*
|
||||
*vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*/
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
@ -22,18 +23,25 @@
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_DBG_LOGGING
|
||||
#define RDCOLL_DEBUG 1
|
||||
#endif
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
/* prototypes of functions used for collective reads only. */
|
||||
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
datatype, int nprocs,
|
||||
int myrank, ADIOI_Access
|
||||
*others_req, ADIO_Offset *offset_list,
|
||||
int *len_list, int contig_access_count,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
ADIO_Offset
|
||||
min_st_offset, ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
|
||||
int *buf_idx, int *error_code);
|
||||
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, ADIO_Offset *offset_list, int
|
||||
*flat_buf, ADIO_Offset *offset_list, ADIO_Offset
|
||||
*len_list, int *send_size, int *recv_size,
|
||||
int *count, int *start_pos,
|
||||
int *partial_send,
|
||||
@ -47,7 +55,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
int iter,
|
||||
MPI_Aint buftype_extent, int *buf_idx);
|
||||
static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, ADIO_Offset *offset_list, int
|
||||
*flat_buf, ADIO_Offset *offset_list, ADIO_Offset
|
||||
*len_list, int *send_size, int *recv_size,
|
||||
int *count, int *start_pos,
|
||||
int *partial_send,
|
||||
@ -62,8 +70,8 @@ static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatl
|
||||
MPI_Aint buftype_extent, int *buf_idx);
|
||||
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **recv_buf, ADIO_Offset
|
||||
*offset_list, int *len_list,
|
||||
int *recv_size,
|
||||
*offset_list, ADIO_Offset *len_list,
|
||||
unsigned *recv_size,
|
||||
MPI_Request *requests, MPI_Status *statuses,
|
||||
int *recd_from_proc, int nprocs,
|
||||
int contig_access_count,
|
||||
@ -74,7 +82,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
|
||||
extern void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
|
||||
datatype, int file_ptr_type, ADIO_Offset
|
||||
offset, ADIO_Offset **offset_list_ptr, int
|
||||
offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
|
||||
**len_list_ptr, ADIO_Offset *start_offset_ptr,
|
||||
ADIO_Offset *end_offset_ptr, int
|
||||
*contig_access_count_ptr);
|
||||
@ -99,25 +107,15 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
whose request lies in this process's file domain. */
|
||||
|
||||
int i, filetype_is_contig, nprocs, nprocs_for_coll, myrank;
|
||||
int contig_access_count, interleave_count = 0, buftype_is_contig;
|
||||
int contig_access_count=0, interleave_count = 0, buftype_is_contig;
|
||||
int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
|
||||
ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off;
|
||||
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
|
||||
*fd_end = NULL, *end_offsets = NULL;
|
||||
ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
|
||||
int ii;
|
||||
int *len_list = NULL, *buf_idx = NULL;
|
||||
|
||||
double io_time = 0., all_time, max_all_time;
|
||||
double tstep1, max_tstep1;
|
||||
double tstep1_1, max_tstep1_1;
|
||||
double tstep1_2, max_tstep1_2;
|
||||
double tstep1_3, max_tstep1_3;
|
||||
double tstep2, max_tstep2;
|
||||
double tstep3, max_tstep3;
|
||||
double tstep4, max_tstep4;
|
||||
double sum_sz;
|
||||
|
||||
ADIO_Offset *len_list = NULL;
|
||||
int *buf_idx = NULL;
|
||||
#if BGL_PROFILE
|
||||
BGLMPIO_T_CIO_RESET( 0, r )
|
||||
#endif
|
||||
@ -126,6 +124,14 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
int bufsize, size;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/* From common code - not implemented for bgl. */
|
||||
if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
|
||||
ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
return;
|
||||
} */
|
||||
#endif
|
||||
#ifdef PROFILE
|
||||
MPE_Log_event(13, 0, "start computation");
|
||||
#endif
|
||||
@ -157,14 +163,16 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
BGLMPIO_T_CIO_SET_GET( 0, r, 1, 1, 1, BGLMPIO_CIO_GATHER, BGLMPIO_CIO_LCOMP )
|
||||
#endif
|
||||
|
||||
/* for (i=0; i<contig_access_count; i++) {
|
||||
FPRINTF(stderr, "rank %d off %ld len %d\n", myrank, offset_list[i],
|
||||
len_list[i]);
|
||||
}*/
|
||||
#ifdef RDCOLL_DEBUG
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
DBG_FPRINTF(stderr, "rank %d off %lld len %lld\n",
|
||||
myrank, offset_list[i], len_list[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* each process communicates its start and end offsets to other
|
||||
processes. The result is an array each of start and end offsets stored
|
||||
in order of process rank. */
|
||||
processes. The result is an array each of start and end offsets
|
||||
stored in order of process rank. */
|
||||
|
||||
st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
|
||||
end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
|
||||
@ -200,7 +208,9 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
|
||||
/* are the accesses of different processes interleaved? */
|
||||
for (i=1; i<nprocs; i++)
|
||||
if (st_offsets[i] < end_offsets[i-1]) interleave_count++;
|
||||
if ((st_offsets[i] < end_offsets[i-1]) &&
|
||||
(st_offsets[i] <= end_offsets[i]))
|
||||
interleave_count++;
|
||||
/* This is a rudimentary check for interleaving, but should suffice
|
||||
for the moment. */
|
||||
}
|
||||
@ -223,7 +233,7 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
|
||||
if (buftype_is_contig && filetype_is_contig) {
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
off = fd->disp + (fd->etype_size) * offset;
|
||||
off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
|
||||
ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
|
||||
off, status, error_code);
|
||||
}
|
||||
@ -263,7 +273,9 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
else
|
||||
ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
|
||||
nprocs_for_coll, &min_st_offset,
|
||||
&fd_start, &fd_end, &fd_size);
|
||||
&fd_start, &fd_end,
|
||||
fd->hints->min_fdomain_size, &fd_size,
|
||||
fd->hints->striping_unit);
|
||||
|
||||
#if BGL_PROFILE
|
||||
BGLMPIO_T_CIO_SET_GET( 0, r, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
|
||||
@ -381,205 +393,11 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
}
|
||||
|
||||
#if 0
|
||||
void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
|
||||
datatype, int file_ptr_type, ADIO_Offset
|
||||
offset, ADIO_Offset **offset_list_ptr, int
|
||||
**len_list_ptr, ADIO_Offset *start_offset_ptr,
|
||||
ADIO_Offset *end_offset_ptr, int
|
||||
*contig_access_count_ptr)
|
||||
{
|
||||
int filetype_size, buftype_size, etype_size;
|
||||
int i, j, k, frd_size=0, old_frd_size=0, st_index=0;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int contig_access_count, *len_list, flag, filetype_is_contig;
|
||||
MPI_Aint filetype_extent, filetype_lb;
|
||||
ADIOI_Flatlist_node *flat_file;
|
||||
ADIO_Offset *offset_list, off, end_offset=0, disp;
|
||||
|
||||
/* For this process's request, calculate the list of offsets and
|
||||
lengths in the file and determine the start and end offsets. */
|
||||
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_lb(fd->filetype, &filetype_lb);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
if ( ! filetype_size ) {
|
||||
*contig_access_count_ptr = 0;
|
||||
*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
|
||||
*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
|
||||
/* 2 is for consistency. everywhere I malloc one more than needed */
|
||||
|
||||
offset_list = *offset_list_ptr;
|
||||
len_list = *len_list_ptr;
|
||||
offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
len_list[0] = 0;
|
||||
*start_offset_ptr = offset_list[0];
|
||||
*end_offset_ptr = offset_list[0] + len_list[0] - 1;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (filetype_is_contig) {
|
||||
*contig_access_count_ptr = 1;
|
||||
*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
|
||||
*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
|
||||
/* 2 is for consistency. everywhere I malloc one more than needed */
|
||||
|
||||
offset_list = *offset_list_ptr;
|
||||
len_list = *len_list_ptr;
|
||||
offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
len_list[0] = bufcount * buftype_size;
|
||||
*start_offset_ptr = offset_list[0];
|
||||
*end_offset_ptr = offset_list[0] + len_list[0] - 1;
|
||||
|
||||
/* update file pointer */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = *end_offset_ptr + 1;
|
||||
}
|
||||
|
||||
else {
|
||||
|
||||
/* First calculate what size of offset_list and len_list to allocate */
|
||||
|
||||
/* filetype already flattened in ADIO_Open or ADIO_Fcntl */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent +
|
||||
flat_file->blocklens[i] >= offset)
|
||||
{
|
||||
st_index = i;
|
||||
frd_size = (int) (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
frd_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
/* calculate how much space to allocate for offset_list, len_list */
|
||||
|
||||
old_frd_size = frd_size;
|
||||
contig_access_count = i = 0;
|
||||
j = st_index;
|
||||
bufsize = buftype_size * bufcount;
|
||||
frd_size = ADIOI_MIN(frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
if (frd_size) contig_access_count++;
|
||||
i += frd_size;
|
||||
j = (j + 1) % flat_file->count;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
|
||||
/* allocate space for offset_list and len_list */
|
||||
|
||||
*offset_list_ptr = (ADIO_Offset *)
|
||||
ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));
|
||||
*len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
offset_list = *offset_list_ptr;
|
||||
len_list = *len_list_ptr;
|
||||
|
||||
/* find start offset, end offset, and fill in offset_list and len_list */
|
||||
|
||||
*start_offset_ptr = offset; /* calculated above */
|
||||
|
||||
i = k = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
frd_size = ADIOI_MIN(old_frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
if (frd_size) {
|
||||
offset_list[k] = off;
|
||||
len_list[k] = frd_size;
|
||||
k++;
|
||||
}
|
||||
i += frd_size;
|
||||
end_offset = off + frd_size - 1;
|
||||
|
||||
/* Note: end_offset points to the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
|
||||
|
||||
if (off + frd_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent)
|
||||
{
|
||||
off += frd_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
* no more I/O needed. off is incremented by frd_size.
|
||||
*/
|
||||
}
|
||||
else {
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
/* hit end of flattened filetype;
|
||||
* start at beginning again
|
||||
*/
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
}
|
||||
|
||||
/* update file pointer */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
|
||||
*contig_access_count_ptr = contig_access_count;
|
||||
*end_offset_ptr = end_offset;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
datatype, int nprocs,
|
||||
int myrank, ADIOI_Access
|
||||
*others_req, ADIO_Offset *offset_list,
|
||||
int *len_list, int contig_access_count, ADIO_Offset
|
||||
ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
|
||||
int *buf_idx, int *error_code)
|
||||
@ -594,19 +412,21 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
array from a file, where each local array is 8Mbytes, requiring
|
||||
at least another 8Mbytes of temp space is unacceptable. */
|
||||
|
||||
int i, j, m, size, ntimes, max_ntimes, buftype_is_contig;
|
||||
int i, j, m, ntimes, max_ntimes, buftype_is_contig;
|
||||
ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
|
||||
char *read_buf = NULL, *tmp_buf;
|
||||
int *curr_offlen_ptr, *count, *send_size, *recv_size;
|
||||
int *partial_send, *recd_from_proc, *start_pos, for_next_iter;
|
||||
int real_size, req_len, flag, for_curr_iter, rank;
|
||||
int *partial_send, *recd_from_proc, *start_pos;
|
||||
/* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
|
||||
ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
|
||||
int req_len, flag, rank;
|
||||
MPI_Status status;
|
||||
ADIOI_Flatlist_node *flat_buf=NULL;
|
||||
MPI_Aint buftype_extent;
|
||||
int coll_bufsize;
|
||||
|
||||
#ifdef RDCOLL_DEBUG
|
||||
int iii;
|
||||
|
||||
#endif
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
/* only I/O errors are currently reported */
|
||||
|
||||
@ -738,7 +558,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
#ifdef PROFILE
|
||||
MPE_Log_event(13, 0, "start computation");
|
||||
#endif
|
||||
size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
|
||||
size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
|
||||
real_off = off - for_curr_iter;
|
||||
real_size = size + for_curr_iter;
|
||||
|
||||
@ -746,7 +566,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
for_next_iter = 0;
|
||||
|
||||
for (i=0; i<nprocs; i++) {
|
||||
/* FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); */
|
||||
#ifdef RDCOLL_DEBUG
|
||||
DBG_FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count);
|
||||
#endif
|
||||
if (others_req[i].count) {
|
||||
start_pos[i] = curr_offlen_ptr[i];
|
||||
for (j=curr_offlen_ptr[i]; j<others_req[i].count;
|
||||
@ -769,22 +591,22 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
}
|
||||
if (req_off < real_off + real_size) {
|
||||
count[i]++;
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+req_off-real_off) == (ADIO_Offset)(MPIR_Upint)(read_buf+req_off-real_off));
|
||||
MPI_Address(read_buf+req_off-real_off,
|
||||
&(others_req[i].mem_ptrs[j]));
|
||||
send_size[i] += (int)(ADIOI_MIN(real_off + (ADIO_Offset)real_size -
|
||||
req_off, req_len));
|
||||
ADIOI_Assert((real_off + real_size - req_off) == (int)(real_off + real_size - req_off));
|
||||
send_size[i] += (int)(ADIOI_MIN(real_off + real_size - req_off,
|
||||
(ADIO_Offset)(unsigned)req_len));
|
||||
|
||||
if (real_off+real_size-req_off < req_len) {
|
||||
partial_send[i] = (int) (real_off+real_size-
|
||||
req_off);
|
||||
if (real_off+real_size-req_off < (ADIO_Offset)(unsigned)req_len) {
|
||||
partial_send[i] = (int) (real_off + real_size - req_off);
|
||||
if ((j+1 < others_req[i].count) &&
|
||||
(others_req[i].offsets[j+1] <
|
||||
real_off+real_size)) {
|
||||
/* this is the case illustrated in the
|
||||
figure above. */
|
||||
for_next_iter = (int) (ADIOI_MAX(for_next_iter,
|
||||
real_off + real_size -
|
||||
others_req[i].offsets[j+1]));
|
||||
for_next_iter = ADIOI_MAX(for_next_iter,
|
||||
real_off + real_size - others_req[i].offsets[j+1]);
|
||||
/* max because it must cover requests
|
||||
from different processes */
|
||||
}
|
||||
@ -805,13 +627,14 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
MPE_Log_event(14, 0, "end computation");
|
||||
#endif
|
||||
if (flag) {
|
||||
ADIO_ReadContig(fd, read_buf+for_curr_iter, size, MPI_BYTE,
|
||||
ADIOI_Assert(size == (int)size);
|
||||
ADIO_ReadContig(fd, read_buf+for_curr_iter, (int)size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status, error_code);
|
||||
/*
|
||||
printf( "\tread_coll: 700, data read [%3d] = ", size );
|
||||
for (iii=0; iii<size; iii++) { printf( "%3d,", *((unsigned char *)read_buf + for_curr_iter + iii) ); }
|
||||
printf( "\n" );
|
||||
*/
|
||||
#ifdef RDCOLL_DEBUG
|
||||
DBG_FPRINTF(stderr, "\tread_coll: 700, data read [%lld] = ", size );
|
||||
for (iii=0; iii<size && iii<80; iii++) { DBGV_FPRINTF(stderr, "%3d,", *((unsigned char *)read_buf + for_curr_iter + iii) ); }
|
||||
DBG_FPRINTF(stderr, "\n" );
|
||||
#endif
|
||||
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
}
|
||||
@ -849,6 +672,8 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
|
||||
if (for_next_iter) {
|
||||
tmp_buf = (char *) ADIOI_Malloc(for_next_iter);
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+real_size-for_next_iter) == (ADIO_Offset)(MPIR_Upint)(read_buf+real_size-for_next_iter));
|
||||
ADIOI_Assert((for_next_iter+coll_bufsize) == (size_t)(for_next_iter+coll_bufsize));
|
||||
memcpy(tmp_buf, read_buf+real_size-for_next_iter, for_next_iter);
|
||||
ADIOI_Free(read_buf);
|
||||
read_buf = (char *) ADIOI_Malloc(for_next_iter+coll_bufsize);
|
||||
@ -902,7 +727,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
|
||||
}
|
||||
|
||||
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, ADIO_Offset *offset_list, int
|
||||
*flat_buf, ADIO_Offset *offset_list, ADIO_Offset
|
||||
*len_list, int *send_size, int *recv_size,
|
||||
int *count, int *start_pos, int *partial_send,
|
||||
int *recd_from_proc, int nprocs,
|
||||
@ -937,6 +762,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
/* post recvs. if buftype_is_contig, data can be directly recd. into
|
||||
user buf at location given by buf_idx. else use recv_buf. */
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5032, 0, NULL);
|
||||
#endif
|
||||
|
||||
if (buftype_is_contig) {
|
||||
j = 0;
|
||||
for (i=0; i < nprocs; i++)
|
||||
@ -960,8 +789,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i,
|
||||
myrank+i+100*iter, fd->comm, requests+j);
|
||||
j++;
|
||||
/* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
|
||||
myrank, recv_size[i], myrank+i+100*iter); */
|
||||
#ifdef RDCOLL_DEBUG
|
||||
DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
|
||||
myrank, recv_size[i], myrank+i+100*iter);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -1006,7 +837,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
/* if noncontiguous, to the copies from the recv buffers */
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
|
||||
offset_list, len_list, recv_size,
|
||||
offset_list, len_list, (unsigned*)recv_size,
|
||||
requests, statuses, recd_from_proc,
|
||||
nprocs, contig_access_count,
|
||||
min_st_offset, fd_size, fd_start, fd_end,
|
||||
@ -1024,9 +855,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
if (recv_size[i]) ADIOI_Free(recv_buf[i]);
|
||||
ADIOI_Free(recv_buf);
|
||||
}
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5033, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#define ADIOI_BUF_INCR \
|
||||
{ \
|
||||
while (buf_incr) { \
|
||||
@ -1040,7 +873,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
n_buftypes*buftype_extent; \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
buf_incr -= size_in_buf; \
|
||||
@ -1052,9 +885,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
{ \
|
||||
while (size) { \
|
||||
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)(buf + user_buf_idx)); \
|
||||
ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
|
||||
memcpy(((char *) buf) + user_buf_idx, \
|
||||
&(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \
|
||||
recv_buf_idx[p] += size_in_buf; \
|
||||
recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \
|
||||
user_buf_idx += size_in_buf; \
|
||||
flat_buf_sz -= size_in_buf; \
|
||||
if (!flat_buf_sz) { \
|
||||
@ -1064,7 +899,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
n_buftypes*buftype_extent; \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
size -= size_in_buf; \
|
||||
@ -1073,11 +908,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
ADIOI_BUF_INCR \
|
||||
}
|
||||
|
||||
|
||||
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **recv_buf, ADIO_Offset
|
||||
*offset_list, int *len_list,
|
||||
int *recv_size,
|
||||
*offset_list, ADIO_Offset *len_list,
|
||||
unsigned *recv_size,
|
||||
MPI_Request *requests, MPI_Status *statuses,
|
||||
int *recd_from_proc, int nprocs,
|
||||
int contig_access_count,
|
||||
@ -1086,13 +920,18 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
ADIO_Offset *fd_end,
|
||||
MPI_Aint buftype_extent)
|
||||
{
|
||||
|
||||
/* this function is only called if buftype is not contig */
|
||||
|
||||
int i, p, flat_buf_idx, size, buf_incr;
|
||||
int flat_buf_sz, size_in_buf, n_buftypes;
|
||||
int i, p, flat_buf_idx;
|
||||
ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
|
||||
int n_buftypes;
|
||||
ADIO_Offset off, len, rem_len, user_buf_idx;
|
||||
/* Not sure unsigned is necessary, but it makes the math safer */
|
||||
unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx;
|
||||
|
||||
int *curr_from_proc, *done_from_proc, *recv_buf_idx;
|
||||
ADIOI_UNREFERENCED_ARG(requests);
|
||||
ADIOI_UNREFERENCED_ARG(statuses);
|
||||
|
||||
/* curr_from_proc[p] = amount of data recd from proc. p that has already
|
||||
been accounted for so far
|
||||
@ -1100,9 +939,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
filled into user buffer in previous iterations
|
||||
user_buf_idx = current location in user buffer
|
||||
recv_buf_idx[p] = current location in recv_buf of proc. p */
|
||||
curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
recv_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
|
||||
done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
|
||||
recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
|
||||
|
||||
for (i=0; i < nprocs; i++) {
|
||||
recv_buf_idx[i] = curr_from_proc[i] = 0;
|
||||
@ -1120,7 +959,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
off = offset_list[i];
|
||||
rem_len = (ADIO_Offset) len_list[i];
|
||||
rem_len = len_list[i];
|
||||
|
||||
/* this request may span the file domains of more than one process */
|
||||
while (rem_len > 0) {
|
||||
@ -1140,29 +979,32 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
if (recv_buf_idx[p] < recv_size[p]) {
|
||||
if (curr_from_proc[p]+len > done_from_proc[p]) {
|
||||
if (done_from_proc[p] > curr_from_proc[p]) {
|
||||
size = (int)ADIOI_MIN(curr_from_proc[p] + len -
|
||||
size = ADIOI_MIN(curr_from_proc[p] + len -
|
||||
done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
|
||||
buf_incr = done_from_proc[p] - curr_from_proc[p];
|
||||
ADIOI_BUF_INCR
|
||||
buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]);
|
||||
buf_incr = curr_from_proc[p]+len-done_from_proc[p];
|
||||
ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size));
|
||||
curr_from_proc[p] = done_from_proc[p] + size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
else {
|
||||
size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
|
||||
buf_incr = (int)len;
|
||||
curr_from_proc[p] += size;
|
||||
size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
|
||||
buf_incr = len;
|
||||
ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size));
|
||||
curr_from_proc[p] += (unsigned) size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
}
|
||||
else {
|
||||
curr_from_proc[p] += (int)len;
|
||||
buf_incr = (int)len;
|
||||
ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len));
|
||||
curr_from_proc[p] += (unsigned) len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf_incr = (int)len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
off += len;
|
||||
@ -1179,7 +1021,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
|
||||
static void ADIOI_R_Exchange_data_alltoallv(
|
||||
ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, ADIO_Offset *offset_list, int
|
||||
*flat_buf, ADIO_Offset *offset_list, ADIO_Offset
|
||||
*len_list, int *send_size, int *recv_size,
|
||||
int *count, int *start_pos, int *partial_send,
|
||||
int *recd_from_proc, int nprocs,
|
||||
@ -1192,9 +1034,8 @@ static void ADIOI_R_Exchange_data_alltoallv(
|
||||
{
|
||||
int i, j, k=0, tmp=0, nprocs_recv, nprocs_send;
|
||||
char **recv_buf = NULL;
|
||||
MPI_Request *requests;
|
||||
MPI_Datatype send_type;
|
||||
MPI_Status *statuses;
|
||||
MPI_Request *requests=NULL;
|
||||
MPI_Status *statuses=NULL;
|
||||
int rtail, stail;
|
||||
char *sbuf_ptr, *from_ptr;
|
||||
int len;
|
||||
@ -1238,7 +1079,8 @@ static void ADIOI_R_Exchange_data_alltoallv(
|
||||
}
|
||||
sbuf_ptr = all_send_buf + sdispls[i];
|
||||
for (j=0; j<count[i]; j++) {
|
||||
from_ptr = (char *)( others_req[i].mem_ptrs[ start_pos[i]+j ] );
|
||||
ADIOI_ENSURE_AINT_FITS_IN_PTR( others_req[i].mem_ptrs[ start_pos[i]+j ]);
|
||||
from_ptr = (char *) ADIOI_AINT_CAST_TO_VOID_PTR ( others_req[i].mem_ptrs[ start_pos[i]+j ] );
|
||||
len = others_req[i].lens[ start_pos[i]+j ] ;
|
||||
memcpy( sbuf_ptr, from_ptr, len );
|
||||
sbuf_ptr += len;
|
||||
@ -1247,26 +1089,19 @@ static void ADIOI_R_Exchange_data_alltoallv(
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
printf( "\tsend_size = " );
|
||||
for (i=0; i<nprocs; i++) { printf( "%2d,", send_size[i] ); }
|
||||
printf( "\n" );
|
||||
printf( "\trecv_size = " );
|
||||
for (i=0; i<nprocs; i++) { printf( "%2d,", recv_size[i] ); }
|
||||
printf( "\n" );
|
||||
printf( "\tsdispls = " );
|
||||
for (i=0; i<nprocs; i++) { printf( "%2d,", sdispls [i] ); }
|
||||
printf( "\n" );
|
||||
printf( "\trdispls = " );
|
||||
for (i=0; i<nprocs; i++) { printf( "%2d,", rdispls [i] ); }
|
||||
printf( "\n" );
|
||||
printf( "\ttails = %4d, %4d\n", stail, rtail );
|
||||
#endif
|
||||
#if 0
|
||||
#if RDCOLL_DEBUG
|
||||
DBG_FPRINTF(stderr, "\tsend_size = [%d]%2d,",0,send_size[0]);
|
||||
for (i=1; i<nprocs; i++) if(send_size[i-1]!=send_size[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,send_size[i] ); }
|
||||
DBG_FPRINTF(stderr, "\trecv_size = [%d]%2d,",0,recv_size[0]);
|
||||
for (i=1; i<nprocs; i++) if(recv_size[i-1]!=recv_size[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,recv_size[i] ); }
|
||||
DBG_FPRINTF(stderr, "\tsdispls = [%d]%2d,",0,sdispls[0]);
|
||||
for (i=1; i<nprocs; i++) if(sdispls[i-1]!=sdispls[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,sdispls [i] ); }
|
||||
DBG_FPRINTF(stderr, "\trdispls = [%d]%2d,",0,rdispls[0]);
|
||||
for (i=1; i<nprocs; i++) if(rdispls[i-1]!=rdispls[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,rdispls [i] ); }
|
||||
DBG_FPRINTF(stderr, "\ttails = %4d, %4d\n", stail, rtail );
|
||||
if (nprocs_send) {
|
||||
printf( "\tall_send_buf = " );
|
||||
for (i=0; i<nprocs; i++) { printf( "%2d,", all_send_buf [i*131072] ); }
|
||||
printf( "\n" );
|
||||
DBG_FPRINTF(stderr, "\tall_send_buf = [%d]%2d,",0,all_send_buf[0]);
|
||||
for (i=1; i<nprocs; i++) if(all_send_buf[(i-1)*131072]!=all_send_buf[i*131072]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i, all_send_buf [i*131072] ); }
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1277,16 +1112,16 @@ static void ADIOI_R_Exchange_data_alltoallv(
|
||||
fd->comm );
|
||||
|
||||
#if 0
|
||||
printf( "\tall_recv_buf = " );
|
||||
for (i=131072; i<131073; i++) { printf( "%2d,", all_recv_buf [i] ); }
|
||||
printf( "\n" );
|
||||
DBG_FPRINTF(stderr, "\tall_recv_buf = " );
|
||||
for (i=131072; i<131073; i++) { DBG_FPRINTF(stderr, "%2d,", all_recv_buf [i] ); }
|
||||
DBG_FPRINTF(stderr, "\n" );
|
||||
#endif
|
||||
|
||||
/* unpack at the receiver side */
|
||||
if (nprocs_recv) {
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
|
||||
offset_list, len_list, recv_size,
|
||||
offset_list, len_list, (unsigned*)recv_size,
|
||||
requests, statuses, /* never used inside */
|
||||
recd_from_proc,
|
||||
nprocs, contig_access_count,
|
||||
|
@ -21,9 +21,9 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
int err=-1, datatype_size, len;
|
||||
int err=-1, datatype_size;
|
||||
ADIO_Offset len;
|
||||
static char myname[] = "ADIOI_BGL_READCONTIG";
|
||||
|
||||
#if BGL_PROFILE
|
||||
/* timing */
|
||||
double io_time, io_time2;
|
||||
@ -35,7 +35,8 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
#endif
|
||||
|
||||
MPI_Type_size(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
|
||||
ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
|
||||
|
||||
#if BGL_PROFILE
|
||||
|
||||
@ -48,7 +49,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
if (bglmpio_timing2) io_time2 = MPI_Wtime();
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
err = read(fd->fd_sys, buf, (unsigned int)len);
|
||||
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
@ -64,7 +65,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
if (bglmpio_timing2) io_time2 = MPI_Wtime();
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
err = read(fd->fd_sys, buf, (unsigned int)len);
|
||||
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
@ -79,7 +80,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
err = read(fd->fd_sys, buf, (unsigned int)len);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
@ -91,7 +92,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
err = read(fd->fd_sys, buf, (unsigned int)len);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
@ -120,12 +121,11 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define ADIOI_BUFFERED_READ \
|
||||
{ \
|
||||
if (req_off >= readbuf_off + readbuf_len) { \
|
||||
readbuf_off = req_off; \
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
|
||||
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
|
||||
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
|
||||
err = read(fd->fd_sys, readbuf, readbuf_len);\
|
||||
@ -133,6 +133,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
while (req_len > readbuf_off + readbuf_len - req_off) { \
|
||||
ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
|
||||
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
|
||||
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
|
||||
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
|
||||
@ -141,7 +142,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
memcpy(readbuf, tmp_buf, partial_read); \
|
||||
ADIOI_Free(tmp_buf); \
|
||||
readbuf_off += readbuf_len-partial_read; \
|
||||
readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
|
||||
readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
|
||||
end_offset-readbuf_off+1)); \
|
||||
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
@ -149,6 +150,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
ADIOI_Assert(req_len == (size_t)req_len); \
|
||||
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
|
||||
}
|
||||
|
||||
@ -160,20 +162,23 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
|
||||
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
|
||||
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset i_offset, new_brd_size, brd_size, size;
|
||||
int i, j, k, err=-1, st_index=0;
|
||||
ADIO_Offset frd_size=0, new_frd_size, st_frd_size;
|
||||
unsigned num, bufsize;
|
||||
int n_etypes_in_filetype;
|
||||
ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size, req_len, partial_read;
|
||||
int filetype_size, etype_size, buftype_size, partial_read;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset userbuf_off, req_len, sum;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
|
||||
char *readbuf, *tmp_buf, *value;
|
||||
int flag, st_frd_size, st_n_filetypes, readbuf_len;
|
||||
int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
|
||||
|
||||
int err_flag=0, info_flag;
|
||||
unsigned max_bufsize, readbuf_len;
|
||||
static char myname[] = "ADIOI_BGL_READSTRIDED";
|
||||
|
||||
if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
|
||||
@ -207,12 +212,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
@ -226,13 +232,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
fd->disp + (ADIO_Offset)etype_size * offset;
|
||||
|
||||
start_off = off;
|
||||
end_offset = off + bufsize - 1;
|
||||
readbuf_off = off;
|
||||
readbuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
|
||||
/* if atomicity is true, lock (exclusive) the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
@ -245,13 +251,16 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
if (err == -1) err_flag = 1;
|
||||
|
||||
for (j=0; j<count; j++)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
userbuf_off = j*buftype_extent + flat_buf->indices[i];
|
||||
userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
@ -277,29 +286,36 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
|
||||
>= offset) {
|
||||
st_index = i;
|
||||
frd_size = (int) (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* frd_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
frd_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
frd_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
n_filetypes = offset / n_etypes_in_filetype;
|
||||
etype_in_filetype = offset % n_etypes_in_filetype;
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
@ -315,32 +331,63 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
|
||||
/* Wei-keng Liao: read request is within a single flat_file contig
|
||||
* block e.g. with subarray types that actually describe the whole
|
||||
* array */
|
||||
if (buftype_is_contig && bufsize <= frd_size) {
|
||||
ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte that
|
||||
* can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == frd_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
|
||||
|
||||
st_frd_size = frd_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i = 0;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
frd_size = ADIOI_MIN(st_frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
i += frd_size;
|
||||
while (i_offset < bufsize) {
|
||||
i_offset += frd_size;
|
||||
end_offset = off + frd_size - 1;
|
||||
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock (exclusive) the region to be accessed */
|
||||
@ -350,7 +397,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
/* initial read into readbuf */
|
||||
readbuf_off = offset;
|
||||
readbuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
|
||||
lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
|
||||
@ -364,12 +411,12 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i = 0;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = ADIOI_MIN(st_frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
while (i_offset < bufsize) {
|
||||
if (frd_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
frd_size = 0. save system call in such cases */
|
||||
@ -378,25 +425,26 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
req_off = off;
|
||||
req_len = frd_size;
|
||||
userbuf_off = i;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_READ
|
||||
}
|
||||
i += frd_size;
|
||||
i_offset += frd_size;
|
||||
|
||||
if (off + frd_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
|
||||
flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
|
||||
off += frd_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by frd_size. */
|
||||
else {
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -408,7 +456,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i = (int) (flat_buf->indices[0]);
|
||||
i_offset = flat_buf->indices[0];
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
@ -423,7 +471,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_READ
|
||||
}
|
||||
|
||||
@ -432,18 +480,19 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
if (size == frd_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
|
||||
new_frd_size = flat_file->blocklens[j];
|
||||
if (size != brd_size) {
|
||||
i += size;
|
||||
i_offset += size;
|
||||
new_brd_size -= size;
|
||||
}
|
||||
}
|
||||
@ -453,7 +502,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
|
||||
i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k]);
|
||||
new_brd_size = flat_buf->blocklens[k];
|
||||
if (size != frd_size) {
|
||||
@ -461,6 +510,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
new_frd_size -= size;
|
||||
}
|
||||
}
|
||||
ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
|
||||
num += size;
|
||||
frd_size = new_frd_size;
|
||||
brd_size = new_brd_size;
|
||||
|
@ -3,7 +3,13 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bgl_tuning.c
|
||||
* \brief ???
|
||||
* \brief defines ad_bgl performance tuning
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2008 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/*---------------------------------------------------------------------
|
||||
@ -26,6 +32,40 @@ double bglmpio_prof_cw [BGLMPIO_CIO_LAST];
|
||||
double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
|
||||
|
||||
/* set internal variables for tuning environment variables */
|
||||
/** \page env_vars Environment Variables
|
||||
* - BGLMPIO_COMM - Define how data is exchanged on collective
|
||||
* reads and writes. Possible values:
|
||||
* - 0 - Use MPI_Alltoallv.
|
||||
* - 1 - Use MPI_Isend/MPI_Irecv.
|
||||
* - Default is 0.
|
||||
*
|
||||
* - BGLMPIO_TIMING - collect timing breakdown for MPI I/O collective calls.
|
||||
* Must also compile the library with BGL_PROFILE defined. Possible values:
|
||||
* - 0 - Do not collect/report timing.
|
||||
* - 1 - Collect/report timing.
|
||||
* - Default is 0.
|
||||
*
|
||||
* - BGLMPIO_TIMING2 - collect additional averages for MPI I/O collective calls.
|
||||
* Must also compile the library with BGL_PROFILE defined. Possible values:
|
||||
* - 0 - Do not collect/report averages.
|
||||
* - 1 - Collect/report averages.
|
||||
* - Default is 0.
|
||||
*
|
||||
* - BGLMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated
|
||||
* for aggregator collective i/o. Possible values:
|
||||
* - 0 - Use two MPI_Allgather's to collect starting and ending offsets.
|
||||
* - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets.
|
||||
* - Default is 1.
|
||||
*
|
||||
* - BGLMPIO_TUNEBLOCKING - Tune how aggregate file domains are
|
||||
* calculated (block size). Possible values:
|
||||
* - 0 - Evenly calculate file domains across aggregators. Also use
|
||||
* MPI_Isend/MPI_Irecv to exchange domain information.
|
||||
* - 1 - Align file domains with the underlying file system's block size. Also use
|
||||
* MPI_Alltoallv to exchange domain information.
|
||||
* - Default is 1.
|
||||
*
|
||||
*/
|
||||
void ad_bgl_get_env_vars() {
|
||||
char *x;
|
||||
|
||||
|
@ -18,6 +18,9 @@
|
||||
#include "ad_bgl_pset.h"
|
||||
#include "ad_bgl_aggrs.h"
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
#ifdef PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
@ -26,13 +29,13 @@
|
||||
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
datatype, int nprocs, int myrank, ADIOI_Access
|
||||
*others_req, ADIO_Offset *offset_list,
|
||||
int *len_list, int contig_access_count, ADIO_Offset
|
||||
ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
|
||||
int *buf_idx, int *error_code);
|
||||
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off, int size,
|
||||
int *count, int *start_pos, int *partial_recv,
|
||||
int *sent_to_proc, int nprocs,
|
||||
@ -49,7 +52,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
char *write_buf, /* 1 */
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
int *len_list, int *send_size, int *recv_size,
|
||||
ADIO_Offset *len_list, int *send_size, int *recv_size,
|
||||
ADIO_Offset off, int size, /* 2 */
|
||||
int *count, int *start_pos, int *partial_recv,
|
||||
int *sent_to_proc, int nprocs, int myrank,
|
||||
@ -65,7 +68,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
int *error_code);
|
||||
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **send_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests, int *sent_to_proc,
|
||||
int nprocs, int myrank,
|
||||
int contig_access_count, ADIO_Offset
|
||||
@ -76,7 +79,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
MPI_Aint buftype_extent);
|
||||
static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **send_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests, int *sent_to_proc,
|
||||
int nprocs, int myrank,
|
||||
int contig_access_count, ADIO_Offset
|
||||
@ -118,26 +121,27 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
|
||||
int ii;
|
||||
|
||||
int *buf_idx = NULL, *len_list = NULL;
|
||||
|
||||
double io_time = 0, all_time, max_all_time;
|
||||
double tstep1, max_tstep1;
|
||||
double tstep1_1, max_tstep1_1;
|
||||
double tstep1_2, max_tstep1_2;
|
||||
double tstep1_3, max_tstep1_3;
|
||||
double tstep2, max_tstep2;
|
||||
double tstep3, max_tstep3;
|
||||
double tstep4, max_tstep4;
|
||||
double sum_sz;
|
||||
|
||||
int *buf_idx = NULL;
|
||||
ADIO_Offset *len_list = NULL;
|
||||
#if BGL_PROFILE
|
||||
BGLMPIO_T_CIO_RESET( 0, w )
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/* From common code - not implemented for bgl.*/
|
||||
int old_error, tmp_error;
|
||||
#endif
|
||||
#ifdef PROFILE
|
||||
MPE_Log_event(13, 0, "start computation");
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/* From common code - not implemented for bgl. */
|
||||
if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
|
||||
ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
@ -207,7 +211,8 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
/* are the accesses of different processes interleaved? */
|
||||
for (i=1; i<nprocs; i++)
|
||||
if ((st_offsets[i] < end_offsets[i-1]) &&
|
||||
(st_offsets[i] <= end_offsets[i])) interleave_count++;
|
||||
(st_offsets[i] <= end_offsets[i]))
|
||||
interleave_count++;
|
||||
/* This is a rudimentary check for interleaving, but should suffice
|
||||
for the moment. */
|
||||
}
|
||||
@ -231,7 +236,7 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
if (buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
off = fd->disp + (fd->etype_size) * offset;
|
||||
off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
|
||||
ADIO_WriteContig(fd, buf, count, datatype,
|
||||
ADIO_EXPLICIT_OFFSET,
|
||||
off, status, error_code);
|
||||
@ -260,7 +265,9 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
else
|
||||
ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
|
||||
nprocs_for_coll, &min_st_offset,
|
||||
&fd_start, &fd_end, &fd_size);
|
||||
&fd_start, &fd_end,
|
||||
fd->hints->min_fdomain_size, &fd_size,
|
||||
fd->hints->striping_unit);
|
||||
|
||||
#if BGL_PROFILE
|
||||
BGLMPIO_T_CIO_SET_GET( 0, w, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
|
||||
@ -329,9 +336,50 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
|
||||
BGLMPIO_T_CIO_REPORT( 0, w, fd, myrank )
|
||||
#endif
|
||||
#if 0
|
||||
/* From common code - not implemented for bgl.
|
||||
*
|
||||
* If this collective write is followed by an independent write,
|
||||
* it's possible to have those subsequent writes on other processes
|
||||
* race ahead and sneak in before the read-modify-write completes.
|
||||
* We carry out a collective communication at the end here so no one
|
||||
* can start independent i/o before collective I/O completes.
|
||||
*
|
||||
* need to do some gymnastics with the error codes so that if something
|
||||
* went wrong, all processes report error, but if a process has a more
|
||||
* specific error code, we can still have that process report the
|
||||
* additional information */
|
||||
|
||||
old_error = *error_code;
|
||||
if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO;
|
||||
|
||||
/* optimization: if only one process performing i/o, we can perform
|
||||
* a less-expensive Bcast */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_postwrite_a, 0, NULL );
|
||||
#endif
|
||||
if (fd->hints->cb_nodes == 1)
|
||||
MPI_Bcast(error_code, 1, MPI_INT,
|
||||
fd->hints->ranklist[0], fd->comm);
|
||||
else {
|
||||
tmp_error = *error_code;
|
||||
MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT,
|
||||
MPI_MAX, fd->comm);
|
||||
}
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL );
|
||||
#endif
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5012, 0, NULL);
|
||||
#endif
|
||||
|
||||
if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) )
|
||||
*error_code = old_error;
|
||||
|
||||
|
||||
#endif
|
||||
/* free all memory allocated for collective I/O */
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
|
||||
for (i=0; i<nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
@ -363,6 +411,9 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
#endif
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5013, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -371,12 +422,12 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
* code is created and returned in error_code.
|
||||
*/
|
||||
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
datatype, int nprocs, int myrank,
|
||||
datatype, int nprocs,
|
||||
int myrank,
|
||||
ADIOI_Access
|
||||
*others_req, ADIO_Offset *offset_list,
|
||||
int *len_list, int contig_access_count,
|
||||
ADIO_Offset
|
||||
min_st_offset, ADIO_Offset fd_size,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
ADIO_Offset min_st_offset, ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
|
||||
int *buf_idx, int *error_code)
|
||||
{
|
||||
@ -389,7 +440,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
array to a file, where each local array is 8Mbytes, requiring
|
||||
at least another 8Mbytes of temp space is unacceptable. */
|
||||
|
||||
int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig;
|
||||
/* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
|
||||
ADIO_Offset size=0;
|
||||
int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
|
||||
ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
|
||||
char *write_buf=NULL;
|
||||
int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
|
||||
@ -410,7 +463,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
That gives the no. of communication phases as well. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
coll_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
@ -526,7 +579,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
#endif
|
||||
for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;
|
||||
|
||||
size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
|
||||
size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
|
||||
|
||||
for (i=0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
@ -550,12 +603,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
}
|
||||
if (req_off < off + size) {
|
||||
count[i]++;
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
|
||||
MPI_Address(write_buf+req_off-off,
|
||||
&(others_req[i].mem_ptrs[j]));
|
||||
recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size -
|
||||
req_off, req_len));
|
||||
ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
|
||||
recv_size[i] += (int)(ADIOI_MIN(off + size - req_off,
|
||||
(unsigned)req_len));
|
||||
|
||||
if (off+size-req_off < req_len)
|
||||
if (off+size-req_off < (unsigned)req_len)
|
||||
{
|
||||
partial_recv[i] = (int) (off + size - req_off);
|
||||
|
||||
@ -618,7 +673,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
if (count[i]) flag = 1;
|
||||
|
||||
if (flag) {
|
||||
ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
ADIOI_Assert(size == (int)size);
|
||||
ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
off, &status, error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
}
|
||||
@ -678,7 +734,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
|
||||
*/
|
||||
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off, int size,
|
||||
int *count, int *start_pos,
|
||||
int *partial_recv,
|
||||
@ -758,20 +814,27 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
}
|
||||
ADIOI_Free(tmp_len);
|
||||
|
||||
/* check if there are any holes */
|
||||
/* check if there are any holes. If yes, must do read-modify-write.
|
||||
* holes can be in three places. 'middle' is what you'd expect: the
|
||||
* processes are operating on noncontigous data. But holes can also show
|
||||
* up at the beginning or end of the file domain (see John Bent ROMIO REQ
|
||||
* #835). Missing these holes would result in us writing more data than
|
||||
* recieved by everyone else. */
|
||||
*hole = 0;
|
||||
/* See if there are holes before the first request or after the last request*/
|
||||
if((srt_off[0] > off) ||
|
||||
((srt_off[sum-1] + srt_len[sum-1]) < (off + size)))
|
||||
{
|
||||
if (off != srt_off[0]) /* hole at the front */
|
||||
*hole = 1;
|
||||
else { /* coalesce the sorted offset-length pairs */
|
||||
for (i=1; i<sum; i++) {
|
||||
if (srt_off[i] <= srt_off[0] + srt_len[0]) {
|
||||
int new_len = srt_off[i] + srt_len[i] - srt_off[0];
|
||||
if (new_len > srt_len[0]) srt_len[0] = new_len;
|
||||
}
|
||||
else /* See if there are holes between the requests, if there are more than one */
|
||||
for (i=0; i<sum-1; i++)
|
||||
if (srt_off[i]+srt_len[i] < srt_off[i+1]) {
|
||||
*hole = 1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (i < sum || size != srt_len[0]) /* hole in middle or end */
|
||||
*hole = 1;
|
||||
}
|
||||
|
||||
ADIOI_Free(srt_off);
|
||||
ADIOI_Free(srt_len);
|
||||
@ -821,6 +884,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
/* post sends. if buftype_is_contig, data can be directly sent from
|
||||
user buf at location given by buf_idx. else use send_buf. */
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5032, 0, NULL);
|
||||
#endif
|
||||
if (buftype_is_contig) {
|
||||
j = 0;
|
||||
for (i=0; i < nprocs; i++)
|
||||
@ -895,6 +961,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
|
||||
#endif
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5033, 0, NULL);
|
||||
#endif
|
||||
ADIOI_Free(statuses);
|
||||
ADIOI_Free(requests);
|
||||
if (!buftype_is_contig && nprocs_send) {
|
||||
@ -918,7 +987,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
n_buftypes*buftype_extent; \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
buf_incr -= size_in_buf; \
|
||||
@ -930,6 +999,8 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
{ \
|
||||
while (size) { \
|
||||
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
|
||||
ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
|
||||
memcpy(&(send_buf[p][send_buf_idx[p]]), \
|
||||
((char *) buf) + user_buf_idx, size_in_buf); \
|
||||
send_buf_idx[p] += size_in_buf; \
|
||||
@ -942,7 +1013,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
n_buftypes*buftype_extent; \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
size -= size_in_buf; \
|
||||
@ -951,11 +1022,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
|
||||
ADIOI_BUF_INCR \
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **send_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests, int *sent_to_proc,
|
||||
int nprocs, int myrank,
|
||||
int contig_access_count,
|
||||
@ -967,8 +1036,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
{
|
||||
/* this function is only called if buftype is not contig */
|
||||
|
||||
int i, p, flat_buf_idx, size;
|
||||
int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
|
||||
int i, p, flat_buf_idx;
|
||||
ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
|
||||
int jj, n_buftypes;
|
||||
ADIO_Offset off, len, rem_len, user_buf_idx;
|
||||
|
||||
/* curr_to_proc[p] = amount of data sent to proc. p that has already
|
||||
@ -995,7 +1065,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
off = offset_list[i];
|
||||
rem_len = (ADIO_Offset) len_list[i];
|
||||
rem_len = len_list[i];
|
||||
|
||||
/*this request may span the file domains of more than one process*/
|
||||
while (rem_len != 0) {
|
||||
@ -1015,17 +1085,20 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
if (send_buf_idx[p] < send_size[p]) {
|
||||
if (curr_to_proc[p]+len > done_to_proc[p]) {
|
||||
if (done_to_proc[p] > curr_to_proc[p]) {
|
||||
size = (int)ADIOI_MIN(curr_to_proc[p] + len -
|
||||
size = ADIOI_MIN(curr_to_proc[p] + len -
|
||||
done_to_proc[p], send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = done_to_proc[p] - curr_to_proc[p];
|
||||
ADIOI_BUF_INCR
|
||||
buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
|
||||
ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
|
||||
buf_incr = curr_to_proc[p] + len - done_to_proc[p];
|
||||
ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
|
||||
curr_to_proc[p] = done_to_proc[p] + size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
else {
|
||||
size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = (int)len;
|
||||
size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = len;
|
||||
ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
|
||||
curr_to_proc[p] += size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
@ -1036,13 +1109,14 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
}
|
||||
}
|
||||
else {
|
||||
curr_to_proc[p] += (int)len;
|
||||
buf_incr = (int)len;
|
||||
ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
|
||||
curr_to_proc[p] += len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf_incr = (int)len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
off += len;
|
||||
@ -1181,7 +1255,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
char *write_buf, /* 1 */
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
int *len_list, int *send_size, int *recv_size,
|
||||
ADIO_Offset *len_list, int *send_size, int *recv_size,
|
||||
ADIO_Offset off, int size, /* 2 */
|
||||
int *count, int *start_pos, int *partial_recv,
|
||||
int *sent_to_proc, int nprocs, int myrank,
|
||||
@ -1196,11 +1270,10 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
int iter, MPI_Aint buftype_extent, int *buf_idx,
|
||||
int *error_code)
|
||||
{
|
||||
int i, j, k=0, tmp=0, nprocs_recv, nprocs_send, erri, *tmp_len, err;
|
||||
int i, j, k=0, nprocs_recv, nprocs_send, *tmp_len, err;
|
||||
char **send_buf = NULL;
|
||||
MPI_Request *requests, *send_req;
|
||||
MPI_Datatype recv_type;
|
||||
MPI_Status *statuses, status;
|
||||
MPI_Request *send_req=NULL;
|
||||
MPI_Status status;
|
||||
int rtail, stail;
|
||||
char *sbuf_ptr, *to_ptr;
|
||||
int len;
|
||||
@ -1324,7 +1397,8 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
|
||||
sbuf_ptr = all_recv_buf + rdispls[i];
|
||||
for (j=0; j<count[i]; j++) {
|
||||
to_ptr = (char *)( others_req[i].mem_ptrs[ start_pos[i]+j ] );
|
||||
ADIOI_ENSURE_AINT_FITS_IN_PTR(others_req[i].mem_ptrs[ start_pos[i]+j ]);
|
||||
to_ptr = (char *) ADIOI_AINT_CAST_TO_VOID_PTR ( others_req[i].mem_ptrs[ start_pos[i]+j ] );
|
||||
len = others_req[i].lens[ start_pos[i]+j ] ;
|
||||
memcpy( to_ptr, sbuf_ptr, len );
|
||||
sbuf_ptr += len;
|
||||
@ -1349,7 +1423,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
|
||||
|
||||
static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node
|
||||
*flat_buf, char **send_buf, ADIO_Offset
|
||||
*offset_list, int *len_list, int *send_size,
|
||||
*offset_list, ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests, int *sent_to_proc,
|
||||
int nprocs, int myrank,
|
||||
int contig_access_count,
|
||||
@ -1361,8 +1435,9 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis
|
||||
{
|
||||
/* this function is only called if buftype is not contig */
|
||||
|
||||
int i, p, flat_buf_idx, size;
|
||||
int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
|
||||
int i, p, flat_buf_idx;
|
||||
ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
|
||||
int jj, n_buftypes;
|
||||
ADIO_Offset off, len, rem_len, user_buf_idx;
|
||||
|
||||
/* curr_to_proc[p] = amount of data sent to proc. p that has already
|
||||
@ -1389,7 +1464,7 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis
|
||||
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
off = offset_list[i];
|
||||
rem_len = (ADIO_Offset) len_list[i];
|
||||
rem_len = len_list[i];
|
||||
|
||||
/*this request may span the file domains of more than one process*/
|
||||
while (rem_len != 0) {
|
||||
@ -1409,17 +1484,20 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis
|
||||
if (send_buf_idx[p] < send_size[p]) {
|
||||
if (curr_to_proc[p]+len > done_to_proc[p]) {
|
||||
if (done_to_proc[p] > curr_to_proc[p]) {
|
||||
size = (int)ADIOI_MIN(curr_to_proc[p] + len -
|
||||
size = ADIOI_MIN(curr_to_proc[p] + len -
|
||||
done_to_proc[p], send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = done_to_proc[p] - curr_to_proc[p];
|
||||
ADIOI_BUF_INCR
|
||||
buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
|
||||
ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
|
||||
buf_incr = curr_to_proc[p] + len - done_to_proc[p];
|
||||
ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
|
||||
curr_to_proc[p] = done_to_proc[p] + size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
else {
|
||||
size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = (int)len;
|
||||
size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
|
||||
buf_incr = len;
|
||||
ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
|
||||
curr_to_proc[p] += size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
@ -1433,13 +1511,14 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis
|
||||
*/
|
||||
}
|
||||
else {
|
||||
ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
|
||||
curr_to_proc[p] += (int)len;
|
||||
buf_incr = (int)len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf_incr = (int)len;
|
||||
buf_incr = len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
off += len;
|
||||
|
@ -17,13 +17,20 @@
|
||||
|
||||
#include "ad_bgl_tuning.h"
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
int err=-1, datatype_size, len;
|
||||
int err=-1, datatype_size;
|
||||
ADIO_Offset len;
|
||||
static char myname[] = "ADIOI_BGL_WRITECONTIG";
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5036, 0, NULL);
|
||||
#endif
|
||||
#if BGL_PROFILE
|
||||
/* timing */
|
||||
double io_time, io_time2;
|
||||
@ -35,7 +42,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
#endif
|
||||
|
||||
MPI_Type_size(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
|
||||
ADIOI_Assert(len == (unsigned int) len); /* write takes an unsigned int parm */
|
||||
|
||||
#if BGL_PROFILE
|
||||
|
||||
@ -46,7 +54,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
if (bglmpio_timing2) io_time2 = MPI_Wtime();
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
err = write(fd->fd_sys, buf, (unsigned int)len);
|
||||
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
@ -60,7 +68,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
if (bglmpio_timing2) io_time2 = MPI_Wtime();
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
err = write(fd->fd_sys, buf, (unsigned int)len);
|
||||
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
@ -73,7 +81,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (fd->fp_sys_posn != offset)
|
||||
lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
err = write(fd->fd_sys, buf, (unsigned int)len);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
@ -83,7 +91,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (fd->fp_sys_posn != fd->fp_ind)
|
||||
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
err = write(fd->fd_sys, buf, (unsigned int)len);
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
@ -110,11 +118,12 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
#endif
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5037, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define ADIOI_BUFFERED_WRITE \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
@ -123,7 +132,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
@ -135,7 +144,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
@ -145,7 +155,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
@ -173,9 +183,10 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
@ -186,7 +197,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
|
||||
} \
|
||||
@ -201,19 +212,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
|
||||
|
||||
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
|
||||
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset i_offset, sum, size_in_filetype;
|
||||
int i, j, k, err=-1, st_index=0;
|
||||
int n_etypes_in_filetype;
|
||||
ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size, req_len;
|
||||
int filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
|
||||
char *writebuf, *value;
|
||||
int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
|
||||
int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
|
||||
unsigned bufsize, writebuf_len, max_bufsize, write_sz;
|
||||
int err_flag=0, info_flag;
|
||||
ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
|
||||
static char myname[] = "ADIOI_BGL_WRITESTRIDED";
|
||||
|
||||
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
|
||||
@ -247,12 +262,13 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
@ -272,20 +288,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
end_offset = off + bufsize - 1;
|
||||
writebuf_off = off;
|
||||
writebuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
for (j=0; j<count; j++)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
userbuf_off = j*buftype_extent + flat_buf->indices[i];
|
||||
userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
|
||||
@ -317,29 +336,37 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
|
||||
>= offset) {
|
||||
st_index = i;
|
||||
fwr_size = (int) (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* fwr_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
fwr_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
fwr_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
n_filetypes = offset / n_etypes_in_filetype;
|
||||
etype_in_filetype = offset % n_etypes_in_filetype;
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
@ -355,32 +382,64 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
/* Wei-keng Liao:write request is within single flat_file contig block*/
|
||||
/* this could happen, for example, with subarray types that are
|
||||
* actually fairly contiguous */
|
||||
if (buftype_is_contig && bufsize <= fwr_size) {
|
||||
ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte
|
||||
* that can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == fwr_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
|
||||
|
||||
st_fwr_size = fwr_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i = 0;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
i += fwr_size;
|
||||
while (i_offset < bufsize) {
|
||||
i_offset += fwr_size;
|
||||
end_offset = off + fwr_size - 1;
|
||||
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
@ -390,7 +449,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
/* initial read for the read-modify-write */
|
||||
writebuf_off = offset;
|
||||
writebuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len);
|
||||
@ -408,39 +467,41 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i = 0;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
while (i_offset < bufsize) {
|
||||
if (fwr_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
fwr_size = 0. save system call in such cases */
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
|
||||
|
||||
req_off = off;
|
||||
req_len = fwr_size;
|
||||
userbuf_off = i;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
i += fwr_size;
|
||||
i_offset += fwr_size;
|
||||
|
||||
if (off + fwr_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
|
||||
flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
|
||||
off += fwr_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by fwr_size. */
|
||||
else {
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j],
|
||||
bufsize-i_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -452,7 +513,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i = (int) (flat_buf->indices[0]);
|
||||
i_offset = flat_buf->indices[0];
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
@ -463,11 +524,11 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
size = ADIOI_MIN(fwr_size, bwr_size);
|
||||
if (size) {
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i, size); */
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
|
||||
@ -476,18 +537,19 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
if (size == fwr_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
|
||||
new_fwr_size = flat_file->blocklens[j];
|
||||
if (size != bwr_size) {
|
||||
i += size;
|
||||
i_offset += size;
|
||||
new_bwr_size -= size;
|
||||
}
|
||||
}
|
||||
@ -497,8 +559,8 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k]);
|
||||
i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k];
|
||||
new_bwr_size = flat_buf->blocklens[k];
|
||||
if (size != fwr_size) {
|
||||
off += size;
|
||||
|
@ -1,7 +0,0 @@
|
||||
<dir>
|
||||
<file name="ad_bglockless.c" info="1205188711"/>
|
||||
</dir>
|
||||
<data>
|
||||
<fileinfo name="ad_bglockless.c">
|
||||
</fileinfo>
|
||||
</data>
|
@ -21,4 +21,6 @@ include $(top_srcdir)/Makefile.options
|
||||
|
||||
noinst_LTLIBRARIES = libadio_bglockless.la
|
||||
libadio_bglockless_la_SOURCES = \
|
||||
ad_bglockless.c
|
||||
ad_bglockless.c \
|
||||
ad_bglockless.h \
|
||||
ad_bglockless_features.c
|
||||
|
@ -6,12 +6,14 @@
|
||||
*/
|
||||
|
||||
#include "../ad_bgl/ad_bgl.h"
|
||||
#include "ad_bglockless.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
|
||||
ADIOI_BGL_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* Collective open */
|
||||
ADIOI_GEN_ReadContig, /* ReadContig */
|
||||
ADIOI_GEN_WriteContig, /* WriteContig */
|
||||
ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -35,7 +37,8 @@ struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_BGL_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_BGLOCKLESS_Feature /* Features */
|
||||
};
|
||||
|
14
ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h
Обычный файл
14
ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h
Обычный файл
@ -0,0 +1,14 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2008 Uchicago Argonne LLC
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_BGLOCKLESS_INCLUDE
|
||||
#define AD_PVFS2_INCLUDE
|
||||
|
||||
int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag);
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,15 @@
|
||||
#include "adio.h"
|
||||
|
||||
int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
return 1;
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -25,6 +25,7 @@ libadio_gridftp_la_SOURCES = \
|
||||
ad_gridftp_close.c \
|
||||
ad_gridftp_delete.c \
|
||||
ad_gridftp_fcntl.c \
|
||||
ad_gridftp_features.c \
|
||||
ad_gridftp_flush.c \
|
||||
ad_gridftp_hints.c \
|
||||
ad_gridftp_open.c \
|
||||
|
@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_GRIDFTP_operations = {
|
||||
ADIOI_GRIDFTP_Flush, /* Flush */
|
||||
ADIOI_GRIDFTP_Resize, /* Resize */
|
||||
ADIOI_GRIDFTP_Delete, /* Delete */
|
||||
ADIOI_GRIDFTP_Feature, /* Features */
|
||||
};
|
||||
|
12
ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c
Обычный файл
12
ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c
Обычный файл
@ -0,0 +1,12 @@
|
||||
int ADIOI_GRIDFTP_Feature (ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -56,8 +56,8 @@ void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
MPI_Info_get_valuelen(users_info,key,&valuelen,&flag);
|
||||
if (flag)
|
||||
{
|
||||
MPI_Info_get(users_info,key,valuelen,value,&flag);
|
||||
if (flag) MPI_Info_set(fd->info,key,value);
|
||||
ADIOI_Info_get(users_info,key,valuelen,value,&flag);
|
||||
if (flag) ADIOI_Info_set(fd->info,key,value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
oattr[] (eg. parallelism, striping, etc.) goes here */
|
||||
if ( fd->info!=MPI_INFO_NULL )
|
||||
{
|
||||
MPI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) &&
|
||||
@ -153,7 +153,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
}
|
||||
|
||||
MPI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
int nftpthreads;
|
||||
@ -170,14 +170,14 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
/* if set to "true" or "enable", set up round-robin block layout */
|
||||
if ( !strncmp("true",hintval,4) || !strncmp("TRUE",hintval,4) ||
|
||||
!strncmp("enable",hintval,4) || !strncmp("ENABLE",hintval,4) )
|
||||
{
|
||||
MPI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
int striping_factor;
|
||||
@ -197,7 +197,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
/* set tcp buffer size */
|
||||
@ -214,7 +214,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
ADIOI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
globus_ftp_control_type_t filetype;
|
||||
@ -340,84 +340,4 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
}
|
||||
}
|
||||
num_gridftp_handles++;
|
||||
|
||||
#if 0
|
||||
/* Debugging info for testing PASV mode behind firewalls */
|
||||
if ( myrank==0 )
|
||||
{
|
||||
globus_bool_t striped;
|
||||
globus_ftp_control_mode_t mode;
|
||||
globus_ftp_control_type_t filetype;
|
||||
globus_ftp_control_parallelism_t parallelism;
|
||||
|
||||
FPRINTF(stderr,"--gridftp details for %s--\n",
|
||||
fd->filename);
|
||||
|
||||
/*
|
||||
FPRINTF(stderr,"Connection caching: ");
|
||||
globus_ftp_client_handleattr_get_cache_all(&hattr,&cached);
|
||||
if ( cached==GLOBUS_TRUE )
|
||||
FPRINTF(stderr,"Y\n");
|
||||
else
|
||||
FPRINTF(stderr,"N\n");
|
||||
*/
|
||||
|
||||
FPRINTF(stderr,"Control mode: ");
|
||||
globus_ftp_client_operationattr_get_mode(&(oattr[fd->fd_sys]),&mode);
|
||||
if ( mode==GLOBUS_FTP_CONTROL_MODE_BLOCK )
|
||||
FPRINTF(stderr,"block\n");
|
||||
else if ( mode==GLOBUS_FTP_CONTROL_MODE_COMPRESSED )
|
||||
FPRINTF(stderr,"compressed\n");
|
||||
else if ( mode==GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK )
|
||||
FPRINTF(stderr,"extended block\n");
|
||||
else if ( mode==GLOBUS_FTP_CONTROL_MODE_STREAM )
|
||||
FPRINTF(stderr,"stream\n");
|
||||
else
|
||||
FPRINTF(stderr,"unknown\n");
|
||||
|
||||
FPRINTF(stderr,"File type: ");
|
||||
globus_ftp_client_operationattr_get_type(&(oattr[fd->fd_sys]),&filetype);
|
||||
if ( filetype==GLOBUS_FTP_CONTROL_TYPE_ASCII )
|
||||
FPRINTF(stderr,"ASCII\n");
|
||||
else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_IMAGE )
|
||||
FPRINTF(stderr,"binary\n");
|
||||
else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_EBCDIC )
|
||||
FPRINTF(stderr,"EBCDIC\n");
|
||||
else
|
||||
FPRINTF(stderr,"unknown\n");
|
||||
|
||||
FPRINTF(stderr,"Parallelism: ");
|
||||
globus_ftp_client_operationattr_get_parallelism(&(oattr[fd->fd_sys]),¶llelism);
|
||||
if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_NONE )
|
||||
FPRINTF(stderr,"none\n");
|
||||
else if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_FIXED )
|
||||
FPRINTF(stderr,"fixed with %d streams\n",parallelism.fixed.size);
|
||||
else
|
||||
FPRINTF(stderr,"unknown\n");
|
||||
|
||||
FPRINTF(stderr,"Striping: ");
|
||||
globus_ftp_client_operationattr_get_striped(&(oattr[fd->fd_sys]),&striped);
|
||||
if ( striped==GLOBUS_TRUE )
|
||||
{
|
||||
globus_ftp_control_layout_t layout;
|
||||
|
||||
FPRINTF(stderr,"Y\nLayout: ");
|
||||
globus_ftp_client_operationattr_get_layout(&(oattr[fd->fd_sys]),
|
||||
&layout);
|
||||
if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_NONE )
|
||||
FPRINTF(stderr,"none\n");
|
||||
else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_PARTITIONED )
|
||||
FPRINTF(stderr,"partitioned, size=%d\n",layout.partitioned.size);
|
||||
else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_BLOCKED_ROUND_ROBIN )
|
||||
FPRINTF(stderr,"round-robin, block size=%d\n",layout.round_robin.block_size);
|
||||
else
|
||||
FPRINTF(stderr,"unknown\n");
|
||||
}
|
||||
else
|
||||
FPRINTF(stderr,"N\n");
|
||||
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -50,10 +50,6 @@ static void readcontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle,
|
||||
readcontig_data_cb: buffer 0x404c0008 length 65536 offset 32112640 eof 0
|
||||
readcontig_data_cb: buffer 0x404d0008 length 65536 offset 32178176 eof 0
|
||||
*/
|
||||
#if 0
|
||||
FPRINTF(stderr, "%s: buffer %p length %d offset %Ld eof %d\n",
|
||||
__func__, buffer, length, offset, eof);
|
||||
#endif
|
||||
if ( !eof )
|
||||
globus_ftp_client_register_read(handle,
|
||||
buffer+length,
|
||||
|
@ -364,10 +364,6 @@ void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count,
|
||||
{
|
||||
fd->fp_ind += extent;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
#if 0
|
||||
FPRINTF(stdout, "[%d/%d] new file position is %Ld\n", myrank,
|
||||
nprocs, (long long) fd->fp_ind);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + extent;
|
||||
|
@ -8,6 +8,9 @@
|
||||
#include "ad_hfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
int i, ntimes, err;
|
||||
|
@ -7,6 +7,10 @@
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode;
|
||||
|
@ -7,6 +7,10 @@
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
|
@ -7,6 +7,10 @@
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
|
@ -1,22 +0,0 @@
|
||||
<dir>
|
||||
<file name="ad_lustre_fcntl.c" info="1204573775"/>
|
||||
<file name="ad_lustre_hints.c" info="1204573775"/>
|
||||
<file name="ad_lustre_open.c" info="1204573775"/>
|
||||
<file name="ad_lustre_rwcontig.c" info="1204573775"/>
|
||||
<file name="ad_lustre.h" info="1204573775"/>
|
||||
<file name="ad_lustre.c" info="1204573775"/>
|
||||
</dir>
|
||||
<data>
|
||||
<fileinfo name="ad_lustre_fcntl.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_lustre_hints.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_lustre_open.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_lustre_rwcontig.c">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_lustre.h">
|
||||
</fileinfo>
|
||||
<fileinfo name="ad_lustre.c">
|
||||
</fileinfo>
|
||||
</data>
|
@ -24,8 +24,11 @@ EXTRA_DIST = README
|
||||
noinst_LTLIBRARIES = libadio_lustre.la
|
||||
libadio_lustre_la_SOURCES = \
|
||||
ad_lustre.c \
|
||||
ad_lustre_aggregate.c \
|
||||
ad_lustre_fcntl.c \
|
||||
ad_lustre.h \
|
||||
ad_lustre_hints.c \
|
||||
ad_lustre_open.c \
|
||||
ad_lustre_rwcontig.c
|
||||
ad_lustre_wrcoll.c \
|
||||
ad_lustre_rwcontig.c \
|
||||
ad_lustre_wrstr.c
|
||||
|
@ -4,6 +4,21 @@ Upcoming soon:
|
||||
Further out:
|
||||
o To post the code for ParColl (Partitioned collective IO)
|
||||
|
||||
-----------------------------------------------------
|
||||
V05:
|
||||
-----------------------------------------------------
|
||||
Improved data redistribution
|
||||
o Improve I/O pattern identification. Besides checking interleaving,
|
||||
if request I/O size is small, collective I/O will be performed.
|
||||
The hint bigsize can be used to define the req size value.
|
||||
o Provide hint CO for load balancing to control the number of
|
||||
IO clients for each OST
|
||||
o Produce stripe-contiguous I/O pattern that Lustre prefers
|
||||
o Control read-modify-write in data sieving in collective IO
|
||||
by hint ds_in_coll.
|
||||
o Reduce extent lock conflicts by make each OST accessed by one or
|
||||
more constant clients.
|
||||
|
||||
-----------------------------------------------------
|
||||
V04:
|
||||
-----------------------------------------------------
|
||||
|
@ -4,21 +4,24 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
|
||||
ADIOI_LUSTRE_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_LUSTRE_ReadContig, /* ReadContig */
|
||||
ADIOI_LUSTRE_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_LUSTRE_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_GEN_Fcntl, /* Fcntl */
|
||||
ADIOI_LUSTRE_SetInfo, /* SetInfo */
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_LUSTRE_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
#if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE)
|
||||
ADIOI_GEN_IreadContig, /* IreadContig */
|
||||
@ -36,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
};
|
||||
|
@ -4,6 +4,8 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#ifndef AD_UNIX_INCLUDE
|
||||
@ -17,6 +19,7 @@
|
||||
|
||||
#ifdef __linux__
|
||||
# include <sys/ioctl.h> /* necessary for: */
|
||||
# include <time.h>
|
||||
# define __USE_GNU /* O_DIRECT and */
|
||||
# include <fcntl.h> /* IO operations */
|
||||
# undef __USE_GNU
|
||||
@ -24,7 +27,7 @@
|
||||
|
||||
/*#include <fcntl.h>*/
|
||||
#include <sys/ioctl.h>
|
||||
#include "lustre/lustre_user.h"
|
||||
#include <lustre/lustre_user.h>
|
||||
#include "adio.h"
|
||||
/*#include "adioi.h"*/
|
||||
|
||||
@ -43,22 +46,46 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
|
||||
void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
|
||||
/* the lustre utilities: */
|
||||
int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
|
||||
ADIO_Offset *len_list, int nprocs);
|
||||
|
||||
void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
|
||||
int mode);
|
||||
void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
int *striping_info, int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int ***buf_idx_ptr);
|
||||
|
||||
int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
|
||||
ADIO_Offset *len, int *striping_info);
|
||||
#endif /* End of AD_UNIX_INCLUDE */
|
||||
|
322
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c
Обычный файл
322
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c
Обычный файл
@ -0,0 +1,322 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#undef AGG_DEBUG
|
||||
|
||||
void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
|
||||
int mode)
|
||||
{
|
||||
int *striping_info = NULL;
|
||||
/* get striping information:
|
||||
* striping_info[0]: stripe_size
|
||||
* striping_info[1]: stripe_count
|
||||
* striping_info[2]: avail_cb_nodes
|
||||
*/
|
||||
int stripe_size, stripe_count, CO = 1;
|
||||
int avail_cb_nodes, divisor, nprocs_for_coll = fd->hints->cb_nodes;
|
||||
|
||||
/* Get hints value */
|
||||
/* stripe size */
|
||||
stripe_size = fd->hints->striping_unit;
|
||||
/* stripe count */
|
||||
/* stripe_size and stripe_count have been validated in ADIOI_LUSTRE_Open() */
|
||||
stripe_count = fd->hints->striping_factor;
|
||||
|
||||
/* Calculate the available number of I/O clients */
|
||||
if (!mode) {
|
||||
/* for collective read,
|
||||
* if "CO" clients access the same OST simultaneously,
|
||||
* the OST disk seek time would be much. So, to avoid this,
|
||||
* it might be better if 1 client only accesses 1 OST.
|
||||
* So, we set CO = 1 to meet the above requirement.
|
||||
*/
|
||||
CO = 1;
|
||||
/*XXX: maybe there are other better way for collective read */
|
||||
} else {
|
||||
/* CO also has been validated in ADIOI_LUSTRE_Open(), >0 */
|
||||
CO = fd->hints->fs_hints.lustre.co_ratio;
|
||||
}
|
||||
/* Calculate how many IO clients we need */
|
||||
/* Algorithm courtesy Pascal Deveze (pascal.deveze@bull.net) */
|
||||
/* To avoid extent lock conflicts,
|
||||
* avail_cb_nodes should either
|
||||
* - be a multiple of stripe_count,
|
||||
* - or divide stripe_count exactly
|
||||
* so that each OST is accessed by a maximum of CO constant clients. */
|
||||
if (nprocs_for_coll >= stripe_count)
|
||||
/* avail_cb_nodes should be a multiple of stripe_count and the number
|
||||
* of procs per OST should be limited to the minimum between
|
||||
* nprocs_for_coll/stripe_count and CO
|
||||
*
|
||||
* e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then
|
||||
* avail_cb_nodes should be equal to 40 */
|
||||
avail_cb_nodes =
|
||||
stripe_count * ADIOI_MIN(nprocs_for_coll/stripe_count, CO);
|
||||
else {
|
||||
/* nprocs_for_coll is less than stripe_count */
|
||||
/* avail_cb_nodes should divide stripe_count */
|
||||
/* e.g. if stripe_count=60 and nprocs_for_coll=8 then
|
||||
* avail_cb_nodes should be egal to 6 */
|
||||
/* This could be done with :
|
||||
while (stripe_count % avail_cb_nodes != 0) avail_cb_nodes--;
|
||||
but this can be optimized for large values of nprocs_for_coll and
|
||||
stripe_count */
|
||||
divisor = 2;
|
||||
avail_cb_nodes = 1;
|
||||
/* try to divise */
|
||||
while (stripe_count >= divisor*divisor) {
|
||||
if ((stripe_count % divisor) == 0) {
|
||||
if (stripe_count/divisor <= nprocs_for_coll) {
|
||||
/* The value is found ! */
|
||||
avail_cb_nodes = stripe_count/divisor;
|
||||
break;
|
||||
}
|
||||
/* if divisor is less than nprocs_for_coll, divisor is a
|
||||
* solution, but it is not sure that it is the best one */
|
||||
else if (divisor <= nprocs_for_coll)
|
||||
avail_cb_nodes = divisor;
|
||||
}
|
||||
divisor++;
|
||||
}
|
||||
}
|
||||
|
||||
*striping_info_ptr = (int *) ADIOI_Malloc(3 * sizeof(int));
|
||||
striping_info = *striping_info_ptr;
|
||||
striping_info[0] = stripe_size;
|
||||
striping_info[1] = stripe_count;
|
||||
striping_info[2] = avail_cb_nodes;
|
||||
}
|
||||
|
||||
int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
|
||||
ADIO_Offset *len, int *striping_info)
|
||||
{
|
||||
int rank_index, rank;
|
||||
ADIO_Offset avail_bytes;
|
||||
int stripe_size = striping_info[0];
|
||||
int avail_cb_nodes = striping_info[2];
|
||||
|
||||
/* Produce the stripe-contiguous pattern for Lustre */
|
||||
rank_index = (int)((off / stripe_size) % avail_cb_nodes);
|
||||
|
||||
/* we index into fd_end with rank_index, and fd_end was allocated to be no
|
||||
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
|
||||
* overrunning arrays. Obviously, we should never ever hit this abort
|
||||
*/
|
||||
if (rank_index >= fd->hints->cb_nodes)
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
|
||||
avail_bytes = (off / (ADIO_Offset)stripe_size + 1) *
|
||||
(ADIO_Offset)stripe_size - off;
|
||||
if (avail_bytes < *len) {
|
||||
/* this proc only has part of the requested contig. region */
|
||||
*len = avail_bytes;
|
||||
}
|
||||
/* map our index to a rank */
|
||||
/* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
|
||||
rank = fd->hints->ranklist[rank_index];
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
/* ADIOI_LUSTRE_Calc_my_req() - calculate what portions of the access requests
|
||||
* of this process are located in the file domains of various processes
|
||||
* (including this one)
|
||||
*/
|
||||
|
||||
|
||||
void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
int *striping_info, int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int ***buf_idx_ptr)
|
||||
{
|
||||
/* Nothing different from ADIOI_Calc_my_req(), except calling
|
||||
* ADIOI_Lustre_Calc_aggregator() instead of the old one */
|
||||
int *count_my_req_per_proc, count_my_req_procs, **buf_idx;
|
||||
int i, l, proc;
|
||||
ADIO_Offset avail_len, rem_len, curr_idx, off;
|
||||
ADIOI_Access *my_req;
|
||||
|
||||
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
count_my_req_per_proc = *count_my_req_per_proc_ptr;
|
||||
/* count_my_req_per_proc[i] gives the no. of contig. requests of this
|
||||
* process in process i's file domain. calloc initializes to zero.
|
||||
* I'm allocating memory of size nprocs, so that I can do an
|
||||
* MPI_Alltoall later on.
|
||||
*/
|
||||
|
||||
buf_idx = (int **) ADIOI_Malloc(nprocs * sizeof(int*));
|
||||
|
||||
/* one pass just to calculate how much space to allocate for my_req;
|
||||
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
|
||||
*/
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write
|
||||
*/
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
avail_len = len_list[i];
|
||||
/* note: we set avail_len to be the total size of the access.
|
||||
* then ADIOI_LUSTRE_Calc_aggregator() will modify the value to return
|
||||
* the amount that was available.
|
||||
*/
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
count_my_req_per_proc[proc]++;
|
||||
|
||||
/* figure out how many data is remaining in the access
|
||||
* we'll take care of this data (if there is any)
|
||||
* in the while loop below.
|
||||
*/
|
||||
rem_len = len_list[i] - avail_len;
|
||||
|
||||
while (rem_len != 0) {
|
||||
off += avail_len; /* point to first remaining byte */
|
||||
avail_len = rem_len; /* save remaining size, pass to calc */
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
count_my_req_per_proc[proc]++;
|
||||
rem_len -= avail_len; /* reduce remaining length by amount from fd */
|
||||
}
|
||||
}
|
||||
|
||||
/* buf_idx is relevant only if buftype_is_contig.
|
||||
* buf_idx[i] gives the index into user_buf where data received
|
||||
* from proc 'i' should be placed. This allows receives to be done
|
||||
* without extra buffer. This can't be done if buftype is not contig.
|
||||
*/
|
||||
|
||||
/* initialize buf_idx vectors */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
/* add one to count_my_req_per_proc[i] to avoid zero size malloc */
|
||||
buf_idx[i] = (int *) ADIOI_Malloc((count_my_req_per_proc[i] + 1)
|
||||
* sizeof(int));
|
||||
}
|
||||
|
||||
/* now allocate space for my_req, offset, and len */
|
||||
*my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs * sizeof(ADIOI_Access));
|
||||
my_req = *my_req_ptr;
|
||||
|
||||
count_my_req_procs = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (count_my_req_per_proc[i]) {
|
||||
my_req[i].offsets = (ADIO_Offset *)
|
||||
ADIOI_Malloc(count_my_req_per_proc[i] *
|
||||
sizeof(ADIO_Offset));
|
||||
my_req[i].lens = (int *) ADIOI_Malloc(count_my_req_per_proc[i] *
|
||||
sizeof(int));
|
||||
count_my_req_procs++;
|
||||
}
|
||||
my_req[i].count = 0; /* will be incremented where needed later */
|
||||
}
|
||||
|
||||
/* now fill in my_req */
|
||||
curr_idx = 0;
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
avail_len = len_list[i];
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
|
||||
l = my_req[proc].count;
|
||||
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
ADIOI_Assert(l < count_my_req_per_proc[proc]);
|
||||
buf_idx[proc][l] = (int) curr_idx;
|
||||
curr_idx += avail_len;
|
||||
|
||||
rem_len = len_list[i] - avail_len;
|
||||
|
||||
/* store the proc, offset, and len information in an array
|
||||
* of structures, my_req. Each structure contains the
|
||||
* offsets and lengths located in that process's FD,
|
||||
* and the associated count.
|
||||
*/
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(avail_len == (int) avail_len);
|
||||
my_req[proc].lens[l] = (int) avail_len;
|
||||
my_req[proc].count++;
|
||||
|
||||
while (rem_len != 0) {
|
||||
off += avail_len;
|
||||
avail_len = rem_len;
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
|
||||
striping_info);
|
||||
|
||||
l = my_req[proc].count;
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
ADIOI_Assert(l < count_my_req_per_proc[proc]);
|
||||
buf_idx[proc][l] = (int) curr_idx;
|
||||
|
||||
curr_idx += avail_len;
|
||||
rem_len -= avail_len;
|
||||
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(avail_len == (int) avail_len);
|
||||
my_req[proc].lens[l] = (int) avail_len;
|
||||
my_req[proc].count++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef AGG_DEBUG
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (count_my_req_per_proc[i] > 0) {
|
||||
FPRINTF(stdout, "data needed from %d (count = %d):\n",
|
||||
i, my_req[i].count);
|
||||
for (l = 0; l < my_req[i].count; l++) {
|
||||
FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n",
|
||||
l, my_req[i].offsets[l], l, my_req[i].lens[l]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*count_my_req_procs_ptr = count_my_req_procs;
|
||||
*buf_idx_ptr = buf_idx;
|
||||
}
|
||||
|
||||
int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
|
||||
ADIO_Offset *len_list, int nprocs)
|
||||
{
|
||||
/* If the processes are non-interleaved, we will check the req_size.
|
||||
* if (avg_req_size > big_req_size) {
|
||||
* docollect = 0;
|
||||
* }
|
||||
*/
|
||||
|
||||
int i, docollect = 1, big_req_size = 0;
|
||||
ADIO_Offset req_size = 0, total_req_size;
|
||||
int avg_req_size, total_access_count;
|
||||
|
||||
/* calculate total_req_size and total_access_count */
|
||||
for (i = 0; i < contig_access_count; i++)
|
||||
req_size += len_list[i];
|
||||
MPI_Allreduce(&req_size, &total_req_size, 1, MPI_LONG_LONG_INT, MPI_SUM,
|
||||
fd->comm);
|
||||
MPI_Allreduce(&contig_access_count, &total_access_count, 1, MPI_INT, MPI_SUM,
|
||||
fd->comm);
|
||||
/* estimate average req_size */
|
||||
avg_req_size = (int)(total_req_size / total_access_count);
|
||||
/* get hint of big_req_size */
|
||||
big_req_size = fd->hints->fs_hints.lustre.coll_threshold;
|
||||
/* Don't perform collective I/O if there are big requests */
|
||||
if ((big_req_size > 0) && (avg_req_size > big_req_size))
|
||||
docollect = 0;
|
||||
|
||||
return docollect;
|
||||
}
|
@ -25,7 +25,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
if (fd->fp_sys_posn != -1)
|
||||
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
if (fcntl_struct->fsize == -1) {
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS,
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
|
||||
}
|
||||
@ -56,7 +56,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
|
||||
&status, error_code);
|
||||
if (*error_code != MPI_SUCCESS) {
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS,
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
|
||||
return;
|
||||
|
@ -4,6 +4,8 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
@ -11,10 +13,12 @@
|
||||
|
||||
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
char *value, *value_in_fd;
|
||||
int flag, tmp_val[3], str_factor=-1, str_unit=0, start_iodev=-1;
|
||||
char *value;
|
||||
int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
|
||||
struct lov_user_md lum = { 0 };
|
||||
int err, myrank, fd_sys, perm, amode, old_mask;
|
||||
int int_val, tmp_val;
|
||||
static char myname[] = "ADIOI_LUSTRE_SETINFO";
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
if ( (fd->info) == MPI_INFO_NULL) {
|
||||
@ -22,54 +26,63 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if necessary. */
|
||||
MPI_Info_create(&(fd->info));
|
||||
|
||||
MPI_Info_set(fd->info, "direct_read", "false");
|
||||
MPI_Info_set(fd->info, "direct_write", "false");
|
||||
ADIOI_Info_set(fd->info, "direct_read", "false");
|
||||
ADIOI_Info_set(fd->info, "direct_write", "false");
|
||||
fd->direct_read = fd->direct_write = 0;
|
||||
/* initialize lustre hints */
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
|
||||
fd->hints->fs_hints.lustre.co_ratio = 1;
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
|
||||
fd->hints->fs_hints.lustre.coll_threshold = 0;
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
|
||||
fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
|
||||
|
||||
/* has user specified striping or server buffering parameters
|
||||
and do they have the same value on all processes? */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
/* striping information */
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag)
|
||||
str_unit=atoi(value);
|
||||
|
||||
MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag)
|
||||
str_factor=atoi(value);
|
||||
|
||||
MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag)
|
||||
start_iodev=atoi(value);
|
||||
|
||||
MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
|
||||
/* direct read and write */
|
||||
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
|
||||
MPI_Info_set(fd->info, "direct_read", "true");
|
||||
ADIOI_Info_set(fd->info, "direct_read", "true");
|
||||
fd->direct_read = 1;
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
|
||||
MPI_Info_set(fd->info, "direct_write", "true");
|
||||
ADIOI_Info_set(fd->info, "direct_write", "true");
|
||||
fd->direct_write = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* set striping information with ioctl */
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
if (myrank == 0) {
|
||||
tmp_val[0] = str_factor;
|
||||
tmp_val[1] = str_unit;
|
||||
tmp_val[2] = start_iodev;
|
||||
stripe_val[0] = str_factor;
|
||||
stripe_val[1] = str_unit;
|
||||
stripe_val[2] = start_iodev;
|
||||
}
|
||||
MPI_Bcast(tmp_val, 3, MPI_INT, 0, fd->comm);
|
||||
MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);
|
||||
|
||||
if (tmp_val[0] != str_factor
|
||||
|| tmp_val[1] != str_unit
|
||||
|| tmp_val[2] != start_iodev) {
|
||||
if (stripe_val[0] != str_factor
|
||||
|| stripe_val[1] != str_unit
|
||||
|| stripe_val[2] != start_iodev) {
|
||||
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
|
||||
"-striping_factor:striping_unit:start_iodevice "
|
||||
"need to be identical across all processes\n");
|
||||
@ -119,17 +132,65 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
} /* End of striping parameters validation */
|
||||
}
|
||||
|
||||
MPI_Barrier(fd->comm);
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
} else {
|
||||
/* The file has been opened previously and fd->fd_sys is a valid
|
||||
file descriptor. cannot set striping parameters now. */
|
||||
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
}
|
||||
/* get other hint */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
/* CO: IO Clients/OST,
|
||||
* to keep the load balancing between clients and OSTs */
|
||||
ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag && (int_val = atoi(value)) > 0) {
|
||||
tmp_val = int_val;
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_val != int_val) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_lustre_co_ratio",
|
||||
error_code);
|
||||
ADIOI_Free(value);
|
||||
return;
|
||||
}
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
|
||||
fd->hints->fs_hints.lustre.co_ratio = atoi(value);
|
||||
}
|
||||
/* coll_threshold:
|
||||
* if the req size is bigger than this, collective IO may not be performed.
|
||||
*/
|
||||
ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
|
||||
&flag);
|
||||
if (flag && (int_val = atoi(value)) > 0) {
|
||||
tmp_val = int_val;
|
||||
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_val != int_val) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_lustre_coll_threshold",
|
||||
error_code);
|
||||
ADIOI_Free(value);
|
||||
return;
|
||||
}
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
|
||||
fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
|
||||
}
|
||||
/* ds_in_coll: disable data sieving in collective IO */
|
||||
ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))) {
|
||||
tmp_val = int_val = 2;
|
||||
MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
|
||||
if (tmp_val != int_val) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"romio_lustre_ds_in_coll",
|
||||
error_code);
|
||||
ADIOI_Free(value);
|
||||
return;
|
||||
}
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
|
||||
fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
}
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
|
||||
if (ADIOI_Direct_read) fd->direct_read = 1;
|
||||
if (ADIOI_Direct_write) fd->direct_write = 1;
|
||||
|
@ -4,14 +4,22 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
|
||||
/* what is the basis for this define?
|
||||
* what happens if there are more than 1k UUIDs? */
|
||||
|
||||
#define MAX_LOV_UUID_COUNT 1000
|
||||
|
||||
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode, amode_direct;
|
||||
struct lov_user_md lum = { 0 };
|
||||
int lumlen;
|
||||
struct lov_user_md *lum = NULL;
|
||||
char *value;
|
||||
|
||||
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
|
||||
@ -44,23 +52,37 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
|
||||
if (fd->fd_sys != -1) {
|
||||
int err;
|
||||
|
||||
/* get file striping information and set it in info */
|
||||
/* odd malloc here because lov_user_md contains some fixed data and
|
||||
* then a list of 'lmm_objects' representing stripe */
|
||||
lumlen = sizeof(struct lov_user_md) +
|
||||
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
|
||||
/* furthermore, Pascal Deveze reports that, even though we pass a
|
||||
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
|
||||
* struct are uninitialzed, the call can give an error. calloc in case
|
||||
* there are other members that must be initialized and in case
|
||||
* lov_user_md struct changes in future */
|
||||
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
|
||||
lum->lmm_magic = LOV_USER_MAGIC;
|
||||
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
|
||||
if (!err) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
/* get file striping information and set it in info */
|
||||
lum.lmm_magic = LOV_USER_MAGIC;
|
||||
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum);
|
||||
fd->hints->striping_unit = lum->lmm_stripe_size;
|
||||
sprintf(value, "%d", lum->lmm_stripe_size);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
|
||||
if (!err) {
|
||||
sprintf(value, "%d", lum.lmm_stripe_size);
|
||||
MPI_Info_set(fd->info, "striping_unit", value);
|
||||
fd->hints->striping_factor = lum->lmm_stripe_count;
|
||||
sprintf(value, "%d", lum->lmm_stripe_count);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
|
||||
sprintf(value, "%d", lum.lmm_stripe_count);
|
||||
MPI_Info_set(fd->info, "striping_factor", value);
|
||||
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
|
||||
sprintf(value, "%d", lum->lmm_stripe_offset);
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
|
||||
|
||||
sprintf(value, "%d", lum.lmm_stripe_offset);
|
||||
MPI_Info_set(fd->info, "start_iodevice", value);
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
}
|
||||
ADIOI_Free(lum);
|
||||
|
||||
if (fd->access_mode & ADIO_APPEND)
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
|
@ -4,6 +4,8 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE 600
|
||||
@ -18,7 +20,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
|
||||
ADIO_Offset offset, int *err)
|
||||
{
|
||||
int ntimes, rem, newrem, i, size, nbytes;
|
||||
int rem, size, nbytes;
|
||||
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) {
|
||||
*err = pwrite(fd->fd_direct, buf, len, offset);
|
||||
} else if (len < fd->d_miniosz) {
|
||||
@ -37,7 +39,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
|
||||
ADIO_Offset offset, int *err)
|
||||
{
|
||||
int ntimes, rem, newrem, i, size, nbytes;
|
||||
int rem, size, nbytes;
|
||||
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz))
|
||||
*err = pread(fd->fd_direct, buf, len, offset);
|
||||
else if (len < fd->d_miniosz)
|
||||
@ -59,7 +61,6 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
|
||||
{
|
||||
int err=-1, diff, size=len, nbytes = 0;
|
||||
void *newbuf;
|
||||
static char myname[] = "ADIOI_LUSTRE_Directio";
|
||||
|
||||
if (offset % fd->d_miniosz) {
|
||||
diff = fd->d_miniosz - (offset % fd->d_miniosz);
|
||||
@ -87,7 +88,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
|
||||
memcpy(newbuf, buf, size);
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
|
||||
nbytes += err;
|
||||
free(newbuf);
|
||||
ADIOI_Free(newbuf);
|
||||
}
|
||||
else nbytes += pwrite(fd->fd_sys, buf, size, offset);
|
||||
}
|
||||
@ -102,7 +103,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
|
||||
if (err > 0) memcpy(buf, newbuf, err);
|
||||
nbytes += err;
|
||||
free(newbuf);
|
||||
ADIOI_Free(newbuf);
|
||||
}
|
||||
else nbytes += pread(fd->fd_sys, buf, size, offset);
|
||||
}
|
||||
@ -136,10 +137,23 @@ static void ADIOI_LUSTRE_IOContig(ADIO_File fd, void *buf, int count,
|
||||
if (err == -1) goto ioerr;
|
||||
}
|
||||
|
||||
if (io_mode)
|
||||
if (io_mode) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
|
||||
#endif
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
else
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_read_a, 0, NULL);
|
||||
#endif
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_read_b, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode);
|
||||
}
|
||||
|
954
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c
Обычный файл
954
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c
Обычный файл
@ -0,0 +1,954 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
/* prototypes of functions used for collective writes only. */
|
||||
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
|
||||
MPI_Datatype datatype, int nprocs,
|
||||
int myrank,
|
||||
ADIOI_Access *others_req,
|
||||
ADIOI_Access *my_req,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
int **buf_idx, int *error_code);
|
||||
static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
char **send_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int contig_access_count,
|
||||
int *striping_info,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int iter,
|
||||
MPI_Aint buftype_extent);
|
||||
static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
|
||||
char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off,
|
||||
int size, int *count,
|
||||
int *start_pos, int *partial_recv,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int buftype_is_contig,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
ADIOI_Access *others_req,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int *hole,
|
||||
int iter, MPI_Aint buftype_extent,
|
||||
int *buf_idx, int *error_code);
|
||||
void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
|
||||
ADIO_Offset *srt_off, int *srt_len, int *start_pos,
|
||||
int nprocs, int nprocs_recv, int total_elements);
|
||||
|
||||
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype,
|
||||
int file_ptr_type, ADIO_Offset offset,
|
||||
ADIO_Status *status, int *error_code)
|
||||
{
|
||||
/* Uses a generalized version of the extended two-phase method described
|
||||
* in "An Extended Two-Phase Method for Accessing Sections of
|
||||
* Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary,
|
||||
* Scientific Programming, (5)4:301--317, Winter 1996.
|
||||
* http://www.mcs.anl.gov/home/thakur/ext2ph.ps
|
||||
*/
|
||||
|
||||
ADIOI_Access *my_req;
|
||||
/* array of nprocs access structures, one for each other process has
|
||||
this process's request */
|
||||
|
||||
ADIOI_Access *others_req;
|
||||
/* array of nprocs access structures, one for each other process
|
||||
whose request is written by this process. */
|
||||
|
||||
int i, filetype_is_contig, nprocs, myrank, do_collect = 0;
|
||||
int contig_access_count = 0, buftype_is_contig, interleave_count = 0;
|
||||
int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
|
||||
ADIO_Offset orig_fp, start_offset, end_offset, off;
|
||||
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL;
|
||||
ADIO_Offset *len_list = NULL;
|
||||
int **buf_idx = NULL, *striping_info = NULL;
|
||||
int old_error, tmp_error;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
orig_fp = fd->fp_ind;
|
||||
|
||||
/* IO patten identification if cb_write isn't disabled */
|
||||
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
|
||||
/* For this process's request, calculate the list of offsets and
|
||||
lengths in the file and determine the start and end offsets. */
|
||||
|
||||
/* Note: end_offset points to the last byte-offset that will be accessed.
|
||||
* e.g., if start_offset=0 and 100 bytes to be read, end_offset=99
|
||||
*/
|
||||
|
||||
ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
|
||||
&offset_list, &len_list, &start_offset,
|
||||
&end_offset, &contig_access_count);
|
||||
|
||||
/* each process communicates its start and end offsets to other
|
||||
* processes. The result is an array each of start and end offsets
|
||||
* stored in order of process rank.
|
||||
*/
|
||||
st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
|
||||
end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
|
||||
MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1,
|
||||
ADIO_OFFSET, fd->comm);
|
||||
MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1,
|
||||
ADIO_OFFSET, fd->comm);
|
||||
/* are the accesses of different processes interleaved? */
|
||||
for (i = 1; i < nprocs; i++)
|
||||
if ((st_offsets[i] < end_offsets[i-1]) &&
|
||||
(st_offsets[i] <= end_offsets[i]))
|
||||
interleave_count++;
|
||||
/* This is a rudimentary check for interleaving, but should suffice
|
||||
for the moment. */
|
||||
|
||||
/* Two typical access patterns can benefit from collective write.
|
||||
* 1) the processes are interleaved, and
|
||||
* 2) the req size is small.
|
||||
*/
|
||||
if (interleave_count > 0) {
|
||||
do_collect = 1;
|
||||
} else {
|
||||
do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count,
|
||||
len_list, nprocs);
|
||||
}
|
||||
}
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
|
||||
/* Decide if collective I/O should be done */
|
||||
if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) ||
|
||||
fd->hints->cb_write == ADIOI_HINT_DISABLE) {
|
||||
|
||||
/* use independent accesses */
|
||||
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
|
||||
ADIOI_Free(offset_list);
|
||||
ADIOI_Free(len_list);
|
||||
ADIOI_Free(st_offsets);
|
||||
ADIOI_Free(end_offsets);
|
||||
}
|
||||
|
||||
fd->fp_ind = orig_fp;
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
if (buftype_is_contig && filetype_is_contig) {
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
|
||||
ADIO_WriteContig(fd, buf, count, datatype,
|
||||
ADIO_EXPLICIT_OFFSET,
|
||||
off, status, error_code);
|
||||
} else
|
||||
ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
|
||||
0, status, error_code);
|
||||
} else {
|
||||
ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
|
||||
offset, status, error_code);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get Lustre hints information */
|
||||
ADIOI_LUSTRE_Get_striping_info(fd, &striping_info, 1);
|
||||
|
||||
/* calculate what portions of the access requests of this process are
|
||||
* located in which process
|
||||
*/
|
||||
ADIOI_LUSTRE_Calc_my_req(fd, offset_list, len_list, contig_access_count,
|
||||
striping_info, nprocs, &count_my_req_procs,
|
||||
&count_my_req_per_proc, &my_req,
|
||||
&buf_idx);
|
||||
|
||||
/* based on everyone's my_req, calculate what requests of other processes
|
||||
* will be accessed by this process.
|
||||
* count_others_req_procs = number of processes whose requests (including
|
||||
* this process itself) will be accessed by this process
|
||||
* count_others_req_per_proc[i] indicates how many separate contiguous
|
||||
* requests of proc. i will be accessed by this process.
|
||||
*/
|
||||
|
||||
ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc,
|
||||
my_req, nprocs, myrank, &count_others_req_procs,
|
||||
&others_req);
|
||||
ADIOI_Free(count_my_req_per_proc);
|
||||
|
||||
/* exchange data and write in sizes of no more than stripe_size. */
|
||||
ADIOI_LUSTRE_Exch_and_write(fd, buf, datatype, nprocs, myrank,
|
||||
others_req, my_req, offset_list, len_list,
|
||||
contig_access_count, striping_info,
|
||||
buf_idx, error_code);
|
||||
|
||||
/* If this collective write is followed by an independent write,
|
||||
* it's possible to have those subsequent writes on other processes
|
||||
* race ahead and sneak in before the read-modify-write completes.
|
||||
* We carry out a collective communication at the end here so no one
|
||||
* can start independent i/o before collective I/O completes.
|
||||
*
|
||||
* need to do some gymnastics with the error codes so that if something
|
||||
* went wrong, all processes report error, but if a process has a more
|
||||
* specific error code, we can still have that process report the
|
||||
* additional information */
|
||||
|
||||
old_error = *error_code;
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
*error_code = MPI_ERR_IO;
|
||||
|
||||
/* optimization: if only one process performing i/o, we can perform
|
||||
* a less-expensive Bcast */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL);
|
||||
#endif
|
||||
if (fd->hints->cb_nodes == 1)
|
||||
MPI_Bcast(error_code, 1, MPI_INT,
|
||||
fd->hints->ranklist[0], fd->comm);
|
||||
else {
|
||||
tmp_error = *error_code;
|
||||
MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT,
|
||||
MPI_MAX, fd->comm);
|
||||
}
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL);
|
||||
#endif
|
||||
|
||||
if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO))
|
||||
*error_code = old_error;
|
||||
|
||||
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
|
||||
/* free all memory allocated for collective I/O */
|
||||
/* free others_req */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
ADIOI_Free(others_req[i].offsets);
|
||||
ADIOI_Free(others_req[i].lens);
|
||||
ADIOI_Free(others_req[i].mem_ptrs);
|
||||
}
|
||||
}
|
||||
ADIOI_Free(others_req);
|
||||
/* free my_req here */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (my_req[i].count) {
|
||||
ADIOI_Free(my_req[i].offsets);
|
||||
ADIOI_Free(my_req[i].lens);
|
||||
}
|
||||
}
|
||||
ADIOI_Free(my_req);
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
ADIOI_Free(buf_idx[i]);
|
||||
}
|
||||
ADIOI_Free(buf_idx);
|
||||
ADIOI_Free(offset_list);
|
||||
ADIOI_Free(len_list);
|
||||
ADIOI_Free(st_offsets);
|
||||
ADIOI_Free(end_offsets);
|
||||
ADIOI_Free(striping_info);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (status) {
|
||||
int bufsize, size;
|
||||
/* Don't set status if it isn't needed */
|
||||
MPI_Type_size(datatype, &size);
|
||||
bufsize = size * count;
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
}
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
* keep track of how much data was actually written during collective I/O.
|
||||
*/
|
||||
#endif
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
}
|
||||
|
||||
/* If successful, error_code is set to MPI_SUCCESS. Otherwise an error
|
||||
* code is created and returned in error_code.
|
||||
*/
|
||||
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
|
||||
MPI_Datatype datatype, int nprocs,
|
||||
int myrank, ADIOI_Access *others_req,
|
||||
ADIOI_Access *my_req,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list,
|
||||
int contig_access_count,
|
||||
int *striping_info, int **buf_idx,
|
||||
int *error_code)
|
||||
{
|
||||
/* Send data to appropriate processes and write in sizes of no more
|
||||
* than lustre stripe_size.
|
||||
* The idea is to reduce the amount of extra memory required for
|
||||
* collective I/O. If all data were written all at once, which is much
|
||||
* easier, it would require temp space more than the size of user_buf,
|
||||
* which is often unacceptable. For example, to write a distributed
|
||||
* array to a file, where each local array is 8Mbytes, requiring
|
||||
* at least another 8Mbytes of temp space is unacceptable.
|
||||
*/
|
||||
|
||||
int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig;
|
||||
ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc;
|
||||
ADIO_Offset off, req_off, send_off, iter_st_off, *off_list;
|
||||
ADIO_Offset max_size, step_size = 0;
|
||||
int real_size, req_len, send_len;
|
||||
int *recv_curr_offlen_ptr, *recv_count, *recv_size;
|
||||
int *send_curr_offlen_ptr, *send_size;
|
||||
int *partial_recv, *sent_to_proc, *recv_start_pos;
|
||||
int *send_buf_idx, *curr_to_proc, *done_to_proc;
|
||||
int *this_buf_idx;
|
||||
char *write_buf = NULL;
|
||||
MPI_Status status;
|
||||
ADIOI_Flatlist_node *flat_buf = NULL;
|
||||
MPI_Aint buftype_extent;
|
||||
int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
|
||||
int data_sieving = 0;
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
/* only I/O errors are currently reported */
|
||||
|
||||
/* calculate the number of writes of stripe size to be done.
|
||||
* That gives the no. of communication phases as well.
|
||||
* Note:
|
||||
* Because we redistribute data in stripe-contiguous pattern for Lustre,
|
||||
* each process has the same no. of communication phases.
|
||||
*/
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
st_loc = others_req[i].offsets[0];
|
||||
end_loc = others_req[i].offsets[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
for (j = 0; j < others_req[i].count; j++) {
|
||||
st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
|
||||
end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] +
|
||||
others_req[i].lens[j] - 1));
|
||||
}
|
||||
}
|
||||
/* this process does no writing. */
|
||||
if ((st_loc == -1) && (end_loc == -1))
|
||||
ntimes = 0;
|
||||
MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm);
|
||||
/* avoid min_st_loc be -1 */
|
||||
if (st_loc == -1)
|
||||
st_loc = max_end_loc;
|
||||
MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm);
|
||||
/* align downward */
|
||||
min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size;
|
||||
|
||||
/* Each time, only avail_cb_nodes number of IO clients perform IO,
|
||||
* so, step_size=avail_cb_nodes*stripe_size IO will be performed at most,
|
||||
* and ntimes=whole_file_portion/step_size
|
||||
*/
|
||||
step_size = (ADIO_Offset) avail_cb_nodes * stripe_size;
|
||||
max_ntimes = (max_end_loc - min_st_loc + 1) / step_size
|
||||
+ (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0);
|
||||
/* max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */
|
||||
if (ntimes)
|
||||
write_buf = (char *) ADIOI_Malloc(stripe_size);
|
||||
|
||||
/* calculate the start offset for each iteration */
|
||||
off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset));
|
||||
for (m = 0; m < max_ntimes; m ++)
|
||||
off_list[m] = max_end_loc;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
for (j = 0; j < others_req[i].count; j ++) {
|
||||
req_off = others_req[i].offsets[j];
|
||||
m = (int)((req_off - min_st_loc) / step_size);
|
||||
off_list[m] = ADIOI_MIN(off_list[m], req_off);
|
||||
}
|
||||
}
|
||||
|
||||
recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
/* their use is explained below. calloc initializes to 0. */
|
||||
|
||||
recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* to store count of how many off-len pairs per proc are satisfied
|
||||
in an iteration. */
|
||||
|
||||
send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* total size of data to be sent to each proc. in an iteration.
|
||||
Of size nprocs so that I can use MPI_Alltoall later. */
|
||||
|
||||
recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* total size of data to be recd. from each proc. in an iteration. */
|
||||
|
||||
sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
/* amount of data sent to each proc so far. Used in
|
||||
ADIOI_Fill_send_buffer. initialized to 0 here. */
|
||||
|
||||
send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* Above three are used in ADIOI_Fill_send_buffer */
|
||||
|
||||
this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
|
||||
recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* used to store the starting value of recv_curr_offlen_ptr[i] in
|
||||
this iteration */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
if (!buftype_is_contig) {
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype)
|
||||
flat_buf = flat_buf->next;
|
||||
}
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
/* I need to check if there are any outstanding nonblocking writes to
|
||||
* the file, which could potentially interfere with the writes taking
|
||||
* place in this collective write call. Since this is not likely to be
|
||||
* common, let me do the simplest thing possible here: Each process
|
||||
* completes all pending nonblocking operations before completing.
|
||||
*/
|
||||
/*ADIOI_Complete_async(error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
MPI_Barrier(fd->comm);
|
||||
*/
|
||||
|
||||
iter_st_off = min_st_loc;
|
||||
|
||||
/* Although we have recognized the data according to OST index,
|
||||
* a read-modify-write will be done if there is a hole between the data.
|
||||
* For example: if blocksize=60, xfersize=30 and stripe_size=100,
|
||||
* then rank0 will collect data [0, 30] and [60, 90] then write. There
|
||||
* is a hole in [30, 60], which will cause a read-modify-write in [0, 90].
|
||||
*
|
||||
* To reduce its impact on the performance, we can disable data sieving
|
||||
* by hint "ds_in_coll".
|
||||
*/
|
||||
/* check the hint for data sieving */
|
||||
data_sieving = fd->hints->fs_hints.lustre.ds_in_coll;
|
||||
|
||||
for (m = 0; m < max_ntimes; m++) {
|
||||
/* go through all others_req and my_req to check which will be received
|
||||
* and sent in this iteration.
|
||||
*/
|
||||
|
||||
/* Note that MPI guarantees that displacements in filetypes are in
|
||||
monotonically nondecreasing order and that, for writes, the
|
||||
filetypes cannot specify overlapping regions in the file. This
|
||||
simplifies implementation a bit compared to reads. */
|
||||
|
||||
/*
|
||||
off = start offset in the file for the data to be written in
|
||||
this iteration
|
||||
iter_st_off = start offset of this iteration
|
||||
real_size = size of data written (bytes) corresponding to off
|
||||
max_size = possible maximum size of data written in this iteration
|
||||
req_off = offset in the file for a particular contiguous request minus
|
||||
what was satisfied in previous iteration
|
||||
send_off = offset the request needed by other processes in this iteration
|
||||
req_len = size corresponding to req_off
|
||||
send_len = size corresponding to send_off
|
||||
*/
|
||||
|
||||
/* first calculate what should be communicated */
|
||||
for (i = 0; i < nprocs; i++)
|
||||
recv_count[i] = recv_size[i] = send_size[i] = 0;
|
||||
|
||||
off = off_list[m];
|
||||
max_size = ADIOI_MIN(step_size, max_end_loc - iter_st_off + 1);
|
||||
real_size = (int) ADIOI_MIN((off / stripe_size + 1) * stripe_size -
|
||||
off,
|
||||
end_loc - off + 1);
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (my_req[i].count) {
|
||||
this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]];
|
||||
for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) {
|
||||
send_off = my_req[i].offsets[j];
|
||||
send_len = my_req[i].lens[j];
|
||||
if (send_off < iter_st_off + max_size) {
|
||||
send_size[i] += send_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
send_curr_offlen_ptr[i] = j;
|
||||
}
|
||||
if (others_req[i].count) {
|
||||
recv_start_pos[i] = recv_curr_offlen_ptr[i];
|
||||
for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) {
|
||||
req_off = others_req[i].offsets[j];
|
||||
req_len = others_req[i].lens[j];
|
||||
if (req_off < iter_st_off + max_size) {
|
||||
recv_count[i]++;
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
|
||||
MPI_Address(write_buf + req_off - off,
|
||||
&(others_req[i].mem_ptrs[j]));
|
||||
recv_size[i] += req_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
recv_curr_offlen_ptr[i] = j;
|
||||
}
|
||||
}
|
||||
/* use variable "hole" to pass data_sieving flag into W_Exchange_data */
|
||||
hole = data_sieving;
|
||||
ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
|
||||
len_list, send_size, recv_size, off, real_size,
|
||||
recv_count, recv_start_pos, partial_recv,
|
||||
sent_to_proc, nprocs, myrank,
|
||||
buftype_is_contig, contig_access_count,
|
||||
striping_info, others_req, send_buf_idx,
|
||||
curr_to_proc, done_to_proc, &hole, m,
|
||||
buftype_extent, this_buf_idx, error_code);
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
|
||||
flag = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (recv_count[i]) {
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
if (flag) {
|
||||
/* check whether to do data sieving */
|
||||
if(data_sieving == ADIOI_HINT_ENABLE) {
|
||||
ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status,
|
||||
error_code);
|
||||
} else {
|
||||
/* if there is no hole, write data in one time;
|
||||
* otherwise, write data in several times */
|
||||
if (!hole) {
|
||||
ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status,
|
||||
error_code);
|
||||
} else {
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
for (j = 0; j < others_req[i].count; j++) {
|
||||
if (others_req[i].offsets[j] < off + real_size &&
|
||||
others_req[i].offsets[j] >= off) {
|
||||
ADIO_WriteContig(fd,
|
||||
write_buf + others_req[i].offsets[j] - off,
|
||||
others_req[i].lens[j],
|
||||
MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
others_req[i].offsets[j], &status,
|
||||
error_code);
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
}
|
||||
iter_st_off += max_size;
|
||||
}
|
||||
over:
|
||||
if (ntimes)
|
||||
ADIOI_Free(write_buf);
|
||||
ADIOI_Free(recv_curr_offlen_ptr);
|
||||
ADIOI_Free(send_curr_offlen_ptr);
|
||||
ADIOI_Free(recv_count);
|
||||
ADIOI_Free(send_size);
|
||||
ADIOI_Free(recv_size);
|
||||
ADIOI_Free(sent_to_proc);
|
||||
ADIOI_Free(recv_start_pos);
|
||||
ADIOI_Free(send_buf_idx);
|
||||
ADIOI_Free(curr_to_proc);
|
||||
ADIOI_Free(done_to_proc);
|
||||
ADIOI_Free(this_buf_idx);
|
||||
ADIOI_Free(off_list);
|
||||
}
|
||||
|
||||
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
|
||||
* in the case of error.
|
||||
*/
|
||||
static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
|
||||
char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off,
|
||||
int size, int *count,
|
||||
int *start_pos, int *partial_recv,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int buftype_is_contig,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
ADIOI_Access *others_req,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc, int *done_to_proc,
|
||||
int *hole, int iter,
|
||||
MPI_Aint buftype_extent,
|
||||
int *buf_idx, int *error_code)
|
||||
{
|
||||
int i, j, nprocs_recv, nprocs_send, err;
|
||||
char **send_buf = NULL;
|
||||
MPI_Request *requests, *send_req;
|
||||
MPI_Datatype *recv_types;
|
||||
MPI_Status *statuses, status;
|
||||
int *srt_len, sum, sum_recv;
|
||||
ADIO_Offset *srt_off;
|
||||
int data_sieving = *hole;
|
||||
static char myname[] = "ADIOI_W_EXCHANGE_DATA";
|
||||
|
||||
/* create derived datatypes for recv */
|
||||
nprocs_recv = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (recv_size[i])
|
||||
nprocs_recv++;
|
||||
|
||||
recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv + 1) *
|
||||
sizeof(MPI_Datatype));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (recv_size[i]) {
|
||||
MPI_Type_hindexed(count[i],
|
||||
&(others_req[i].lens[start_pos[i]]),
|
||||
&(others_req[i].mem_ptrs[start_pos[i]]),
|
||||
MPI_BYTE, recv_types + j);
|
||||
/* absolute displacements; use MPI_BOTTOM in recv */
|
||||
MPI_Type_commit(recv_types + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
/* To avoid a read-modify-write,
|
||||
* check if there are holes in the data to be written.
|
||||
* For this, merge the (sorted) offset lists others_req using a heap-merge.
|
||||
*/
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
sum += count[i];
|
||||
srt_off = (ADIO_Offset *) ADIOI_Malloc((sum + 1) * sizeof(ADIO_Offset));
|
||||
srt_len = (int *) ADIOI_Malloc((sum + 1) * sizeof(int));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
|
||||
nprocs, nprocs_recv, sum);
|
||||
|
||||
/* check if there are any holes */
|
||||
*hole = 0;
|
||||
for (i = 0; i < sum - 1; i++) {
|
||||
if (srt_off[i] + srt_len[i] < srt_off[i + 1]) {
|
||||
*hole = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* In some cases (see John Bent ROMIO REQ # 835), an odd interaction
|
||||
* between aggregation, nominally contiguous regions, and cb_buffer_size
|
||||
* should be handled with a read-modify-write (otherwise we will write out
|
||||
* more data than we receive from everyone else (inclusive), so override
|
||||
* hole detection
|
||||
*/
|
||||
if (*hole == 0) {
|
||||
sum_recv = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
sum_recv += recv_size[i];
|
||||
if (size > sum_recv)
|
||||
*hole = 1;
|
||||
}
|
||||
/* check the hint for data sieving */
|
||||
if (data_sieving == ADIOI_HINT_ENABLE && nprocs_recv && *hole) {
|
||||
ADIO_ReadContig(fd, write_buf, size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status, &err);
|
||||
// --BEGIN ERROR HANDLING--
|
||||
if (err != MPI_SUCCESS) {
|
||||
*error_code = MPIO_Err_create_code(err,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**ioRMWrdwr", 0);
|
||||
ADIOI_Free(recv_types);
|
||||
ADIOI_Free(srt_off);
|
||||
ADIOI_Free(srt_len);
|
||||
return;
|
||||
}
|
||||
// --END ERROR HANDLING--
|
||||
}
|
||||
ADIOI_Free(srt_off);
|
||||
ADIOI_Free(srt_len);
|
||||
|
||||
nprocs_send = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
nprocs_send++;
|
||||
|
||||
if (fd->atomicity) {
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + 1) *
|
||||
sizeof(MPI_Request));
|
||||
send_req = requests;
|
||||
} else {
|
||||
requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1)*
|
||||
sizeof(MPI_Request));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
/* post receives */
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (recv_size[i]) {
|
||||
MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i,
|
||||
myrank + i + 100 * iter, fd->comm, requests + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
send_req = requests + nprocs_recv;
|
||||
}
|
||||
|
||||
/* post sends.
|
||||
* if buftype_is_contig, data can be directly sent from
|
||||
* user buf at location given by buf_idx. else use send_buf.
|
||||
*/
|
||||
if (buftype_is_contig) {
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i]) {
|
||||
ADIOI_Assert(buf_idx[i] != -1);
|
||||
MPI_Isend(((char *) buf) + buf_idx[i], send_size[i],
|
||||
MPI_BYTE, i, myrank + i + 100 * iter, fd->comm,
|
||||
send_req + j);
|
||||
j++;
|
||||
}
|
||||
} else
|
||||
if (nprocs_send) {
|
||||
/* buftype is not contig */
|
||||
send_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char *));
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);
|
||||
|
||||
ADIOI_LUSTRE_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list,
|
||||
len_list, send_size, send_req,
|
||||
sent_to_proc, nprocs, myrank,
|
||||
contig_access_count, striping_info,
|
||||
send_buf_idx, curr_to_proc, done_to_proc,
|
||||
iter, buftype_extent);
|
||||
/* the send is done in ADIOI_Fill_send_buffer */
|
||||
}
|
||||
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
if (fd->atomicity) {
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
MPI_Status wkl_status;
|
||||
if (recv_size[i]) {
|
||||
MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i,
|
||||
myrank + i + 100 * iter, fd->comm, &wkl_status);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nprocs_recv; i++)
|
||||
MPI_Type_free(recv_types + i);
|
||||
ADIOI_Free(recv_types);
|
||||
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
if (fd->atomicity) {
|
||||
statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + 1) *
|
||||
sizeof(MPI_Status));
|
||||
} else {
|
||||
statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1) *
|
||||
sizeof(MPI_Status));
|
||||
}
|
||||
|
||||
#ifdef NEEDS_MPI_TEST
|
||||
i = 0;
|
||||
if (fd->atomicity) {
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
while (!i)
|
||||
MPI_Testall(nprocs_send, send_req, &i, statuses);
|
||||
} else {
|
||||
while (!i)
|
||||
MPI_Testall(nprocs_send + nprocs_recv, requests, &i, statuses);
|
||||
}
|
||||
#else
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
if (fd->atomicity)
|
||||
MPI_Waitall(nprocs_send, send_req, statuses);
|
||||
else
|
||||
MPI_Waitall(nprocs_send + nprocs_recv, requests, statuses);
|
||||
#endif
|
||||
ADIOI_Free(statuses);
|
||||
ADIOI_Free(requests);
|
||||
if (!buftype_is_contig && nprocs_send) {
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
ADIOI_Free(send_buf[i]);
|
||||
ADIOI_Free(send_buf);
|
||||
}
|
||||
}
|
||||
|
||||
#define ADIOI_BUF_INCR \
|
||||
{ \
|
||||
while (buf_incr) { \
|
||||
size_in_buf = ADIOI_MIN(buf_incr, flat_buf_sz); \
|
||||
user_buf_idx += size_in_buf; \
|
||||
flat_buf_sz -= size_in_buf; \
|
||||
if (!flat_buf_sz) { \
|
||||
if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
|
||||
else { \
|
||||
flat_buf_idx = 0; \
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
buf_incr -= size_in_buf; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define ADIOI_BUF_COPY \
|
||||
{ \
|
||||
while (size) { \
|
||||
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
|
||||
ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
|
||||
memcpy(&(send_buf[p][send_buf_idx[p]]), \
|
||||
((char *) buf) + user_buf_idx, size_in_buf); \
|
||||
send_buf_idx[p] += size_in_buf; \
|
||||
user_buf_idx += size_in_buf; \
|
||||
flat_buf_sz -= size_in_buf; \
|
||||
if (!flat_buf_sz) { \
|
||||
if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
|
||||
else { \
|
||||
flat_buf_idx = 0; \
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
size -= size_in_buf; \
|
||||
buf_incr -= size_in_buf; \
|
||||
} \
|
||||
ADIOI_BUF_INCR \
|
||||
}
|
||||
|
||||
static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
char **send_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int iter,
|
||||
MPI_Aint buftype_extent)
|
||||
{
|
||||
/* this function is only called if buftype is not contig */
|
||||
int i, p, flat_buf_idx, size;
|
||||
int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
|
||||
ADIO_Offset off, len, rem_len, user_buf_idx;
|
||||
|
||||
/* curr_to_proc[p] = amount of data sent to proc. p that has already
|
||||
* been accounted for so far
|
||||
* done_to_proc[p] = amount of data already sent to proc. p in
|
||||
* previous iterations
|
||||
* user_buf_idx = current location in user buffer
|
||||
* send_buf_idx[p] = current location in send_buf of proc. p
|
||||
*/
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
send_buf_idx[i] = curr_to_proc[i] = 0;
|
||||
done_to_proc[i] = sent_to_proc[i];
|
||||
}
|
||||
jj = 0;
|
||||
|
||||
user_buf_idx = flat_buf->indices[0];
|
||||
flat_buf_idx = 0;
|
||||
n_buftypes = 0;
|
||||
flat_buf_sz = flat_buf->blocklens[0];
|
||||
|
||||
/* flat_buf_idx = current index into flattened buftype
|
||||
* flat_buf_sz = size of current contiguous component in flattened buf
|
||||
*/
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
off = offset_list[i];
|
||||
rem_len = (ADIO_Offset) len_list[i];
|
||||
|
||||
/*this request may span to more than one process */
|
||||
while (rem_len != 0) {
|
||||
len = rem_len;
|
||||
/* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no
|
||||
* longer than the single region that processor "p" is responsible
|
||||
* for.
|
||||
*/
|
||||
p = ADIOI_LUSTRE_Calc_aggregator(fd, off, &len, striping_info);
|
||||
|
||||
if (send_buf_idx[p] < send_size[p]) {
|
||||
if (curr_to_proc[p] + len > done_to_proc[p]) {
|
||||
if (done_to_proc[p] > curr_to_proc[p]) {
|
||||
size = (int) ADIOI_MIN(curr_to_proc[p] + len -
|
||||
done_to_proc[p],
|
||||
send_size[p] -
|
||||
send_buf_idx[p]);
|
||||
buf_incr = done_to_proc[p] - curr_to_proc[p];
|
||||
ADIOI_BUF_INCR
|
||||
ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
|
||||
buf_incr = (int) (curr_to_proc[p] + len -
|
||||
done_to_proc[p]);
|
||||
ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
|
||||
curr_to_proc[p] = done_to_proc[p] + size;
|
||||
ADIOI_BUF_COPY
|
||||
} else {
|
||||
size = (int) ADIOI_MIN(len, send_size[p] -
|
||||
send_buf_idx[p]);
|
||||
buf_incr = (int) len;
|
||||
ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
|
||||
curr_to_proc[p] += size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
if (send_buf_idx[p] == send_size[p]) {
|
||||
MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p,
|
||||
myrank + p + 100 * iter, fd->comm,
|
||||
requests + jj);
|
||||
jj++;
|
||||
}
|
||||
} else {
|
||||
ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
|
||||
curr_to_proc[p] += (int) len;
|
||||
buf_incr = (int) len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
} else {
|
||||
buf_incr = (int) len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
off += len;
|
||||
rem_len -= len;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
sent_to_proc[i] = curr_to_proc[i];
|
||||
}
|
530
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c
Обычный файл
530
ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c
Обычный файл
@ -0,0 +1,530 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#define ADIOI_BUFFERED_WRITE \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
if (writebuf_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, \
|
||||
&status1, error_code); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
writebuf_off = req_off; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, \
|
||||
writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowsrc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
write_sz = (unsigned) (ADIOI_MIN(req_len, \
|
||||
writebuf_off + writebuf_len - req_off)); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == \
|
||||
ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf + req_off - writebuf_off, (char *)buf +userbuf_off, write_sz); \
|
||||
while (write_sz != req_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, \
|
||||
writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowsrc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/* this macro is used when filetype is contig and buftype is not contig.
|
||||
it does not do a read-modify-write and does not lock*/
|
||||
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, \
|
||||
error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
writebuf_off = req_off; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
} \
|
||||
write_sz = (unsigned) ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf + req_off - writebuf_off, \
|
||||
(char *)buf + userbuf_off, write_sz); \
|
||||
while (write_sz != req_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
|
||||
} \
|
||||
}
|
||||
|
||||
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status * status,
|
||||
int *error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
ADIO_Offset i_offset, sum, size_in_filetype;
|
||||
int i, j, k, st_index=0;
|
||||
int n_etypes_in_filetype;
|
||||
ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
|
||||
char *writebuf;
|
||||
unsigned bufsize, writebuf_len, write_sz;
|
||||
ADIO_Status status1;
|
||||
ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
|
||||
int stripe_size;
|
||||
static char myname[] = "ADIOI_LUSTRE_WriteStrided";
|
||||
|
||||
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
|
||||
/* if user has disabled data sieving on writes, use naive
|
||||
* approach instead.
|
||||
*/
|
||||
ADIOI_GEN_WriteStrided_naive(fd,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
file_ptr_type,
|
||||
offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if (!filetype_size) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get striping info */
|
||||
stripe_size = fd->hints->striping_unit;
|
||||
|
||||
/* Different buftype to different filetype */
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype)
|
||||
flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + (ADIO_Offset)etype_size * offset;
|
||||
|
||||
start_off = off;
|
||||
end_offset = start_off + bufsize - 1;
|
||||
/* write stripe size buffer each time */
|
||||
writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);
|
||||
|
||||
for (j = 0; j < count; j++) {
|
||||
for (i = 0; i < flat_buf->count; i++) {
|
||||
userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent +
|
||||
flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
|
||||
error_code);
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
|
||||
if (*error_code != MPI_SUCCESS) {
|
||||
ADIOI_Free(writebuf);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(writebuf);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind = off;
|
||||
} else {
|
||||
/* noncontiguous in file */
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype)
|
||||
flat_file = flat_file->next;
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* fwr_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
fwr_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
fwr_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = offset / n_etypes_in_filetype;
|
||||
etype_in_filetype = offset % n_etypes_in_filetype;
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
fwr_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
|
||||
/* Wei-keng Liao:write request is within single flat_file
|
||||
* contig block*/
|
||||
/* this could happen, for example, with subarray types that are
|
||||
* actually fairly contiguous */
|
||||
if (buftype_is_contig && bufsize <= fwr_size) {
|
||||
req_off = start_off;
|
||||
req_len = bufsize;
|
||||
end_offset = start_off + bufsize - 1;
|
||||
writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
|
||||
memset(writebuf, -1, ADIOI_MIN(bufsize, stripe_size));
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
userbuf_off = 0;
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
/* write the buffer out finally */
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
|
||||
error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte
|
||||
* that can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == fwr_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
ADIOI_Free(writebuf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
|
||||
|
||||
st_fwr_size = fwr_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i_offset < bufsize) {
|
||||
i_offset += fwr_size;
|
||||
end_offset = off + fwr_size - 1;
|
||||
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
writebuf = (char *) ADIOI_Malloc(stripe_size);
|
||||
memset(writebuf, -1, stripe_size);
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i_offset < bufsize) {
|
||||
if (fwr_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
fwr_size = 0. save system call in such cases */
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
|
||||
|
||||
req_off = off;
|
||||
req_len = fwr_size;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
i_offset += fwr_size;
|
||||
|
||||
if (off + fwr_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent)
|
||||
off += fwr_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by fwr_size. */
|
||||
else {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j],
|
||||
bufsize-i_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i_offset = flat_buf->indices[0];
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = st_fwr_size;
|
||||
bwr_size = flat_buf->blocklens[0];
|
||||
|
||||
while (num < bufsize) {
|
||||
size = ADIOI_MIN(fwr_size, bwr_size);
|
||||
if (size) {
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
|
||||
new_fwr_size = fwr_size;
|
||||
new_bwr_size = bwr_size;
|
||||
|
||||
if (size == fwr_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
|
||||
new_fwr_size = flat_file->blocklens[j];
|
||||
if (size != bwr_size) {
|
||||
i_offset += size;
|
||||
new_bwr_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
if (size == bwr_size) {
|
||||
/* reached end of contiguous block in memory */
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i_offset = (ADIO_Offset)buftype_extent *
|
||||
(ADIO_Offset)(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k];
|
||||
new_bwr_size = flat_buf->blocklens[k];
|
||||
if (size != fwr_size) {
|
||||
off += size;
|
||||
new_fwr_size -= size;
|
||||
}
|
||||
}
|
||||
num += size;
|
||||
fwr_size = new_fwr_size;
|
||||
bwr_size = new_bwr_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
if (writebuf_len) {
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET,
|
||||
writebuf_off, &status1, error_code);
|
||||
if (!(fd->atomicity))
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
}
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
ADIOI_Free(writebuf);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
}
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
}
|
@ -24,6 +24,7 @@ libadio_nfs_la_SOURCES = \
|
||||
ad_nfs.h \
|
||||
ad_nfs_done.c \
|
||||
ad_nfs_fcntl.c \
|
||||
ad_nfs_features.c \
|
||||
ad_nfs_getsh.c \
|
||||
ad_nfs_hints.c \
|
||||
ad_nfs_iread.c \
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_NFS_operations = {
|
||||
ADIOI_NFS_Open, /* Open */
|
||||
ADIOI_FAILSAFE_OpenColl, /* OpenColl */
|
||||
ADIOI_NFS_ReadContig, /* ReadContig */
|
||||
ADIOI_NFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -35,4 +36,5 @@ struct ADIOI_Fns_struct ADIO_NFS_operations = {
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_NFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_NFS_Feature, /* Features */
|
||||
};
|
||||
|
@ -78,5 +78,6 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp,
|
||||
int *error_code);
|
||||
void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
|
||||
void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
int ADIOI_NFS_Feature(ADIO_File fd, int feature_flag);
|
||||
|
||||
#endif
|
||||
|
16
ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c
Обычный файл
16
ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c
Обычный файл
@ -0,0 +1,16 @@
|
||||
#include "adio.h"
|
||||
#include "ad_nfs.h"
|
||||
|
||||
int ADIOI_NFS_Feature(ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
return 1;
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -59,6 +59,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
|
||||
struct aiocb *aiocbp;
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
MPI_Status status;
|
||||
|
||||
fd_sys = fd->fd_sys;
|
||||
|
||||
@ -108,7 +109,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
/* exceeded the max. no. of outstanding requests.
|
||||
complete all previous async. requests and try again. */
|
||||
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, NULL, &error_code);
|
||||
offset, &status, &error_code);
|
||||
MPIO_Completed_request_create(&fd, len, &error_code, request);
|
||||
return 0;
|
||||
} else {
|
||||
|
@ -177,7 +177,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
|
||||
char *readbuf, *tmp_buf, *value;
|
||||
int flag, st_frd_size, st_n_filetypes, readbuf_len;
|
||||
int st_frd_size, st_n_filetypes, readbuf_len;
|
||||
int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
|
||||
|
||||
static char myname[] = "ADIOI_NFS_READSTRIDED";
|
||||
@ -201,7 +201,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
@ -278,24 +278,31 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
|
||||
>= offset) {
|
||||
st_index = i;
|
||||
frd_size = (int) (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* frd_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
frd_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0 ) {
|
||||
frd_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
@ -316,11 +323,42 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
|
||||
/* Wei-keng Liao: read request is within a single flat_file contig
|
||||
* block e.g. with subarray types that actually describe the whole
|
||||
* array */
|
||||
if (buftype_is_contig && bufsize <= frd_size) {
|
||||
ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte that
|
||||
* can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == frd_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
|
||||
|
||||
@ -333,11 +371,11 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
while (i < bufsize) {
|
||||
i += frd_size;
|
||||
end_offset = off + frd_size - 1;
|
||||
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
|
||||
@ -402,10 +440,11 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by frd_size. */
|
||||
else {
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
@ -445,12 +484,12 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
if (size == frd_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
|
||||
|
@ -10,120 +10,6 @@ void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return;
|
||||
#if 0
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NFS_READCOMPLETE";
|
||||
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_HANDLE
|
||||
struct aiocb *tmp1;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (*request == ADIO_REQUEST_NULL) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS
|
||||
/* old IBM */
|
||||
if ((*request)->queued) {
|
||||
do {
|
||||
#if !defined(_AIO_AIX_SOURCE) && !defined(_NO_PROTO)
|
||||
err = aio_suspend((*request)->handle,1,NULL);
|
||||
#else
|
||||
err = aio_suspend(1, (struct aiocb **) &((*request)->handle));
|
||||
#endif
|
||||
} while ((err == -1) && (errno == EINTR));
|
||||
|
||||
tmp1 = (struct aiocb *) (*request)->handle;
|
||||
if (err != -1) {
|
||||
err = aio_return(tmp1->aio_handle);
|
||||
(*request)->nbytes = err;
|
||||
errno = aio_error(tmp1->aio_handle);
|
||||
}
|
||||
else (*request)->nbytes = -1;
|
||||
|
||||
/* on DEC, it is required to call aio_return to dequeue the request.
|
||||
IBM man pages don't indicate what function to use for dequeue.
|
||||
I'm assuming it is aio_return! */
|
||||
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
else *error_code = MPI_SUCCESS; /* if ( (*request)->queued ) */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if ((*request)->nbytes != -1)
|
||||
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
|
||||
#endif
|
||||
|
||||
#elif defined(ROMIO_HAVE_WORKING_AIO)
|
||||
/* all other aio types */
|
||||
if ((*request)->queued) {
|
||||
do {
|
||||
err = aio_suspend((const struct aiocb **) &((*request)->handle), 1, 0);
|
||||
} while ((err == -1) && (errno == EINTR));
|
||||
|
||||
if (err != -1) {
|
||||
err = aio_return((struct aiocb *) (*request)->handle);
|
||||
(*request)->nbytes = err;
|
||||
errno = aio_error((struct aiocb *) (*request)->handle);
|
||||
}
|
||||
else (*request)->nbytes = -1;
|
||||
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
else *error_code = MPI_SUCCESS; /* if ((*request)->queued) ... */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if ((*request)->nbytes != -1)
|
||||
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
if ((*request)->queued != -1) {
|
||||
|
||||
/* queued = -1 is an internal hack used when the request must
|
||||
be completed, but the request object should not be
|
||||
freed. This is used in ADIOI_Complete_async, because the user
|
||||
will call MPI_Wait later, which would require status to
|
||||
be filled. Ugly but works. queued = -1 should be used only
|
||||
in ADIOI_Complete_async.
|
||||
This should not affect the user in any way. */
|
||||
|
||||
/* if request is still queued in the system, it is also there
|
||||
on ADIOI_Async_list. Delete it from there. */
|
||||
if ((*request)->queued) ADIOI_Del_req_from_list(request);
|
||||
|
||||
(*request)->fd->async_count--;
|
||||
if ((*request)->handle) ADIOI_Free((*request)->handle);
|
||||
ADIOI_Free_request((ADIOI_Req_node *) (*request));
|
||||
*request = ADIO_REQUEST_NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
/* no aio */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
|
||||
#endif
|
||||
(*request)->fd->async_count--;
|
||||
ADIOI_Free_request((ADIOI_Req_node *) (*request));
|
||||
*request = ADIO_REQUEST_NULL;
|
||||
*error_code = MPI_SUCCESS;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -281,7 +281,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
|
||||
char *writebuf, *value;
|
||||
int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
|
||||
int st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
|
||||
int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
|
||||
static char myname[] = "ADIOI_NFS_WRITESTRIDED";
|
||||
|
||||
@ -304,7 +304,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
@ -381,24 +381,31 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
|
||||
>= offset) {
|
||||
st_index = i;
|
||||
fwr_size = (int) (disp + flat_file->indices[i] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* fwr_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
fwr_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
fwr_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
@ -419,10 +426,40 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
/* Wei-keng Liao:write request is within single flat_file contig block*/
|
||||
/* this could happen, for example, with subarray types that are
|
||||
* actually fairly contiguous */
|
||||
if (buftype_is_contig && bufsize <= fwr_size) {
|
||||
ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte
|
||||
* that can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == fwr_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
|
||||
@ -436,14 +473,15 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
while (i < bufsize) {
|
||||
i += fwr_size;
|
||||
end_offset = off + fwr_size - 1;
|
||||
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
|
||||
@ -509,10 +547,11 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by fwr_size. */
|
||||
else {
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
@ -552,10 +591,11 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
|
||||
if (size == fwr_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_NTFS_operations = {
|
||||
ADIOI_NTFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_NTFS_ReadContig, /* ReadContig */
|
||||
ADIOI_NTFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_NTFS_operations = {
|
||||
ADIOI_NTFS_Flush, /* Flush */
|
||||
ADIOI_NTFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature /* Features */
|
||||
};
|
||||
|
@ -60,7 +60,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
|
||||
/* TODO: unsure how to handle this */
|
||||
}
|
||||
}else{
|
||||
MPIR_Nest_incr();
|
||||
mpi_errno = MPI_Grequest_complete(aio_req->req);
|
||||
if (mpi_errno != MPI_SUCCESS) {
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
@ -69,7 +68,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
|
||||
MPI_ERR_IO, "**mpi_grequest_complete",
|
||||
0);
|
||||
}
|
||||
MPIR_Nest_decr();
|
||||
}
|
||||
return mpi_errno;
|
||||
}
|
||||
@ -111,7 +109,6 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states,
|
||||
aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes),
|
||||
FALSE)){
|
||||
/* XXX: mark completed requests as 'done'*/
|
||||
MPIR_Nest_incr();
|
||||
mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
|
||||
if (mpi_errno != MPI_SUCCESS) {
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
@ -120,7 +117,6 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states,
|
||||
MPI_ERR_IO, "**mpi_grequest_complete",
|
||||
0);
|
||||
}
|
||||
MPIR_Nest_decr();
|
||||
}else{
|
||||
if(GetLastError() == ERROR_IO_INCOMPLETE){
|
||||
/* IO in progress */
|
||||
@ -146,7 +142,6 @@ int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status)
|
||||
|
||||
MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
|
||||
|
||||
/* do i need to nest_incr/nest_decr here? */
|
||||
/* can never cancel so always true */
|
||||
MPI_Status_set_cancelled(status, 0);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_PANFS_operations = {
|
||||
ADIOI_PANFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl,
|
||||
ADIOI_PANFS_ReadContig, /* ReadContig */
|
||||
ADIOI_PANFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -39,4 +40,5 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = {
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_PANFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature,
|
||||
};
|
||||
|
@ -36,7 +36,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
concurrent_write = strtoul(value,NULL,10);
|
||||
@ -46,10 +46,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_concurrent_write\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_concurrent_write", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_concurrent_write", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_type = strtoul(value,NULL,10);
|
||||
@ -59,10 +59,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_type\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_type", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_type", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_stripe_unit = strtoul(value,NULL,10);
|
||||
@ -72,10 +72,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_stripe_unit\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_stripe_unit", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
|
||||
layout_parity_stripe_width = strtoul(value,NULL,10);
|
||||
@ -85,10 +85,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_width\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
|
||||
layout_parity_stripe_depth = strtoul(value,NULL,10);
|
||||
@ -98,10 +98,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_depth\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_total_num_comps = strtoul(value,NULL,10);
|
||||
@ -111,10 +111,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_total_num_comps\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_total_num_comps", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
|
||||
layout_visit_policy = strtoul(value,NULL,10);
|
||||
@ -124,7 +124,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_visit_policy\" must be the same on all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
MPI_Info_set(fd->info, "panfs_layout_visit_policy", value);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", value);
|
||||
}
|
||||
|
||||
ADIOI_Free(value);
|
||||
|
@ -39,32 +39,32 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_type = strtoul(value,NULL,10);
|
||||
}
|
||||
MPI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_stripe_unit = strtoul(value,NULL,10);
|
||||
}
|
||||
MPI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_total_num_comps = strtoul(value,NULL,10);
|
||||
}
|
||||
MPI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_parity_stripe_width = strtoul(value,NULL,10);
|
||||
}
|
||||
MPI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_parity_stripe_depth = strtoul(value,NULL,10);
|
||||
}
|
||||
MPI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_visit_policy = strtoul(value,NULL,10);
|
||||
@ -266,7 +266,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
unsigned long int concurrent_write = strtoul(value,NULL,10);
|
||||
@ -291,41 +291,41 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
|
||||
if (rc < 0)
|
||||
{
|
||||
/* Error - set layout type to unknown */
|
||||
MPI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
|
||||
}
|
||||
else
|
||||
{
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
|
||||
MPI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
|
||||
if (file_query_args.layout.layout_is_valid == 1)
|
||||
{
|
||||
switch (file_query_args.layout.agg_type)
|
||||
{
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
|
||||
MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
|
||||
MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
break;
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
|
||||
MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
|
||||
MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
|
||||
MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
|
||||
MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
|
||||
MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
break;
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
|
||||
MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
|
||||
MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
|
||||
MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_factor=atoi(value);
|
||||
@ -40,7 +40,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_unit=atoi(value);
|
||||
@ -56,7 +56,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
start_iodev=atoi(value);
|
||||
@ -119,15 +119,15 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
If so, mark it as true in fd->info and turn it on in
|
||||
ADIOI_PFS_Open after the file is opened */
|
||||
|
||||
MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true")))
|
||||
MPI_Info_set(fd->info, "pfs_svr_buf", "true");
|
||||
else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
|
||||
else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
|
||||
ADIOI_Free(value);
|
||||
}
|
||||
else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
@ -144,23 +144,23 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) {
|
||||
value_in_fd = (char *)
|
||||
ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
value_in_fd, &flag);
|
||||
if (strcmp(value, value_in_fd)) {
|
||||
if (!strcmp(value, "true")) {
|
||||
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
|
||||
if (!err)
|
||||
MPI_Info_set(fd->info, "pfs_svr_buf", "true");
|
||||
ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
|
||||
}
|
||||
else {
|
||||
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE);
|
||||
if (!err)
|
||||
MPI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
}
|
||||
}
|
||||
ADIOI_Free(value_in_fd);
|
||||
|
@ -49,11 +49,11 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
|
||||
to ADIOI_PFS_SetInfo. Turn it on now, since we now have a
|
||||
valid file descriptor. */
|
||||
|
||||
MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true"))) {
|
||||
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
|
||||
if (err) MPI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
if (err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
|
||||
}
|
||||
|
||||
/* get file striping information and set it in info */
|
||||
@ -61,13 +61,13 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
|
||||
|
||||
if (!err) {
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sunitsize);
|
||||
MPI_Info_set(fd->info, "striping_unit", value);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sfactor);
|
||||
MPI_Info_set(fd->info, "striping_factor", value);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_start_sdir);
|
||||
MPI_Info_set(fd->info, "start_iodevice", value);
|
||||
ADIOI_Info_set(fd->info, "start_iodevice", value);
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
|
||||
|
@ -25,6 +25,7 @@ libadio_piofs_la_SOURCES = \
|
||||
ad_piofs.c \
|
||||
ad_piofs.h \
|
||||
ad_piofs_fcntl.c \
|
||||
ad_piofs_features.c \
|
||||
ad_piofs_hints.c \
|
||||
ad_piofs_open.c \
|
||||
ad_piofs_read.c \
|
||||
|
@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PIOFS_operations = {
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_PIOFS_Feature,
|
||||
};
|
||||
|
@ -35,4 +35,6 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
*error_code);
|
||||
void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
|
||||
void ADIOI_PIOFS_Feature(ADIO_File fd, int flag);
|
||||
|
||||
#endif
|
||||
|
13
ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c
Обычный файл
13
ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c
Обычный файл
@ -0,0 +1,13 @@
|
||||
int ADIOI_PIOFS_Features(int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_ATOMIC_MODE:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
default:
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
@ -25,7 +25,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_factor=atoi(value);
|
||||
@ -37,7 +37,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_unit=atoi(value);
|
||||
@ -49,7 +49,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
start_iodev=atoi(value);
|
||||
|
@ -49,13 +49,13 @@ void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code)
|
||||
|
||||
if (!err) {
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_bsu);
|
||||
MPI_Info_set(fd->info, "striping_unit", value);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_cells);
|
||||
MPI_Info_set(fd->info, "striping_factor", value);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_base_node);
|
||||
MPI_Info_set(fd->info, "start_iodevice", value);
|
||||
ADIOI_Info_set(fd->info, "start_iodevice", value);
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
|
||||
|
@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PVFS_operations = {
|
||||
ADIOI_PVFS_Flush, /* Flush */
|
||||
ADIOI_PVFS_Resize, /* Resize */
|
||||
ADIOI_PVFS_Delete, /* Delete */
|
||||
ADIOI_PVFS_Feature, /* Features */
|
||||
};
|
||||
|
@ -17,8 +17,8 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* This must be part of the open call. can set striping parameters
|
||||
if necessary. */
|
||||
MPI_Info_create(&(fd->info));
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
|
||||
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
|
||||
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
|
||||
|
||||
@ -27,7 +27,7 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_factor=atoi(value);
|
||||
@ -41,10 +41,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
else MPI_Info_set(fd->info, "striping_factor", value);
|
||||
else ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
str_unit=atoi(value);
|
||||
@ -58,10 +58,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
else MPI_Info_set(fd->info, "striping_unit", value);
|
||||
else ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
start_iodev=atoi(value);
|
||||
@ -75,25 +75,25 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
else MPI_Info_set(fd->info, "start_iodevice", value);
|
||||
else ADIOI_Info_set(fd->info, "start_iodevice", value);
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "romio_pvfs_listio_read",
|
||||
ADIOI_Info_get(users_info, "romio_pvfs_listio_read",
|
||||
MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
|
||||
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE;
|
||||
} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE"))
|
||||
{
|
||||
MPI_Info_set(fd->info , "romio_pvfs_listio_read", value);
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value);
|
||||
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
|
||||
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO;
|
||||
}
|
||||
tmp_val = fd->hints->fs_hints.pvfs.listio_read;
|
||||
@ -107,21 +107,21 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
}
|
||||
MPI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE;
|
||||
} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE"))
|
||||
{
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
|
||||
{
|
||||
MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
|
||||
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO;
|
||||
}
|
||||
tmp_val = fd->hints->fs_hints.pvfs.listio_write;
|
||||
|
@ -37,15 +37,15 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code)
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (atoi(value) > 0)) pstat.pcount = atoi(value);
|
||||
|
||||
MPI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (atoi(value) > 0)) pstat.ssize = atoi(value);
|
||||
|
||||
MPI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (atoi(value) >= 0)) pstat.base = atoi(value);
|
||||
|
||||
@ -71,11 +71,11 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code)
|
||||
if (fd->fd_sys != -1) {
|
||||
pvfs_ioctl(fd->fd_sys, GETMETA, &pstat);
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.pcount);
|
||||
MPI_Info_set(fd->info, "striping_factor", value);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.ssize);
|
||||
MPI_Info_set(fd->info, "striping_unit", value);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.base);
|
||||
MPI_Info_set(fd->info, "start_iodevice", value);
|
||||
ADIOI_Info_set(fd->info, "start_iodevice", value);
|
||||
}
|
||||
|
||||
ADIOI_Free(value);
|
||||
|
@ -43,6 +43,7 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
if (err>0)
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
@ -63,6 +64,7 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
if (err > 0)
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
if (err > 0)
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
@ -63,6 +64,7 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
if (err > 0)
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
@ -28,9 +28,15 @@ libadio_pvfs2_la_SOURCES = \
|
||||
ad_pvfs2_common.c \
|
||||
ad_pvfs2_delete.c \
|
||||
ad_pvfs2_fcntl.c \
|
||||
ad_pvfs2_features.c \
|
||||
ad_pvfs2_flush.c \
|
||||
ad_pvfs2_hints.c \
|
||||
ad_pvfs2_io.h \
|
||||
ad_pvfs2_io_dtype.c \
|
||||
ad_pvfs2_io_list.c \
|
||||
ad_pvfs2_open.c \
|
||||
ad_pvfs2_read.c \
|
||||
ad_pvfs2_read_list_classic.c
|
||||
ad_pvfs2_resize.c \
|
||||
ad_pvfs2_write.c
|
||||
ad_pvfs2_write.c \
|
||||
ad_pvfs2_write_list_classic.c
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
|
||||
ADIOI_PVFS2_Open, /* Open */
|
||||
ADIOI_SCALEABLE_OpenColl, /* OpenColl */
|
||||
ADIOI_PVFS2_ReadContig, /* ReadContig */
|
||||
ADIOI_PVFS2_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -22,13 +23,8 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
|
||||
ADIOI_PVFS2_ReadStrided, /* ReadStrided */
|
||||
ADIOI_PVFS2_WriteStrided, /* WriteStrided */
|
||||
ADIOI_PVFS2_Close, /* Close */
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
ADIOI_PVFS2_IReadContig, /* IreadContig */
|
||||
ADIOI_PVFS2_IWriteContig, /* IwriteContig */
|
||||
#else
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
#endif
|
||||
ADIOI_FAKE_IODone, /* ReadDone */
|
||||
ADIOI_FAKE_IODone, /* WriteDone */
|
||||
ADIOI_FAKE_IOComplete, /* ReadComplete */
|
||||
@ -38,6 +34,7 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
|
||||
ADIOI_PVFS2_Flush, /* Flush */
|
||||
ADIOI_PVFS2_Resize, /* Resize */
|
||||
ADIOI_PVFS2_Delete, /* Delete */
|
||||
ADIOI_PVFS2_Feature,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "pvfs2-compat.h"
|
||||
#endif
|
||||
|
||||
|
||||
void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code);
|
||||
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
@ -42,6 +41,8 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code);
|
||||
void ADIOI_PVFS2_Delete(char *filename, int *error_code);
|
||||
void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
int ADIOI_PVFS2_Feature(ADIO_File fd, int flag);
|
||||
|
||||
void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, MPI_Request *request,
|
||||
@ -54,4 +55,12 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, MPI_Request *request,
|
||||
int flag, int *error_code);
|
||||
void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
#endif
|
||||
|
@ -17,7 +17,6 @@
|
||||
#define READ 0
|
||||
#define WRITE 1
|
||||
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
static int ADIOI_PVFS2_greq_class = 0;
|
||||
int ADIOI_PVFS2_aio_free_fn(void *extra_state);
|
||||
int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status);
|
||||
@ -168,12 +167,10 @@ int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status)
|
||||
aio_req = (ADIOI_AIO_Request *)extra_state;
|
||||
|
||||
/* BUG: cannot PVFS_sys_testsome: does not work for a specific request */
|
||||
ret = PVFS_sys_wait(aio_req->op_id, __FUNCTION__, &error);
|
||||
ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_aio_poll_fn", &error);
|
||||
if (ret == 0) {
|
||||
aio_req->nbytes = aio_req->resp_io.total_completed;
|
||||
MPIR_Nest_incr();
|
||||
MPI_Grequest_complete(aio_req->req);
|
||||
MPIR_Nest_decr();
|
||||
return MPI_SUCCESS;
|
||||
} else
|
||||
return MPI_UNDEFINED; /* TODO: what's this error? */
|
||||
@ -186,7 +183,7 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
|
||||
|
||||
ADIOI_AIO_Request **aio_reqlist;
|
||||
PVFS_sys_op_id *op_id_array;
|
||||
int i,j, greq_count;
|
||||
int i,j, greq_count, completed_count=0;
|
||||
int *error_array;
|
||||
|
||||
aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
|
||||
@ -195,25 +192,27 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
|
||||
error_array = (int *)ADIOI_Calloc(count, sizeof(int));
|
||||
greq_count = count;
|
||||
|
||||
|
||||
/* PVFS-2.6: testsome actually tests all requests and fills in op_id_array
|
||||
* with the ones that have completed. count is an in/out parameter.
|
||||
* returns with the number of completed operations. what a mess! */
|
||||
while (completed_count < greq_count ) {
|
||||
count = greq_count;
|
||||
PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
|
||||
completed_count += count;
|
||||
for (i=0; i< count; i++) {
|
||||
for (j=0; j<greq_count; j++) {
|
||||
if (op_id_array[i] == aio_reqlist[j]->op_id) {
|
||||
aio_reqlist[j]->nbytes =
|
||||
aio_reqlist[j]->resp_io.total_completed;
|
||||
MPIR_Nest_incr();
|
||||
MPI_Grequest_complete(aio_reqlist[j]->req);
|
||||
MPIR_Nest_decr();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return MPI_SUCCESS; /* TODO: no idea how to deal with errors */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
|
@ -42,6 +42,7 @@ int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval,
|
||||
{
|
||||
int error_code;
|
||||
ADIOI_PVFS2_End(&error_code);
|
||||
MPI_Keyval_free(&keyval);
|
||||
return error_code;
|
||||
}
|
||||
|
||||
@ -81,7 +82,7 @@ void ADIOI_PVFS2_Init(int *error_code )
|
||||
&ADIOI_PVFS2_Initialized, (void *)0);
|
||||
/* just like romio does, we make a dummy attribute so we
|
||||
* get cleaned up */
|
||||
MPI_Attr_put(MPI_COMM_WORLD, ADIOI_PVFS2_Initialized, (void *)0);
|
||||
MPI_Attr_put(MPI_COMM_SELF, ADIOI_PVFS2_Initialized, (void *)0);
|
||||
}
|
||||
|
||||
void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs)
|
||||
@ -107,9 +108,43 @@ void ADIOI_PVFS2_makecredentials(PVFS_credentials * credentials)
|
||||
|
||||
int ADIOI_PVFS2_error_convert(int pvfs_error)
|
||||
{
|
||||
switch(pvfs_error)
|
||||
{
|
||||
case PVFS_EPERM:
|
||||
case PVFS_EACCES:
|
||||
return MPI_ERR_ACCESS;
|
||||
case PVFS_ENOENT:
|
||||
case PVFS_ENXIO:
|
||||
case PVFS_ENODEV:
|
||||
return MPI_ERR_NO_SUCH_FILE;
|
||||
case PVFS_EIO:
|
||||
return MPI_ERR_IO;
|
||||
case PVFS_EEXIST:
|
||||
return MPI_ERR_FILE_EXISTS;
|
||||
case PVFS_ENOTDIR: /* ??? */
|
||||
case PVFS_EISDIR: /* ??? */
|
||||
case PVFS_ENAMETOOLONG:
|
||||
return MPI_ERR_BAD_FILE;
|
||||
case PVFS_EINVAL:
|
||||
return MPI_ERR_FILE;
|
||||
case PVFS_EFBIG: /* ??? */
|
||||
case PVFS_ENOSPC:
|
||||
return MPI_ERR_NO_SPACE;
|
||||
case PVFS_EROFS:
|
||||
return MPI_ERR_READ_ONLY;
|
||||
case PVFS_ENOSYS:
|
||||
return MPI_ERR_UNSUPPORTED_OPERATION;
|
||||
/* PVFS does not support quotas */
|
||||
case EDQUOT:
|
||||
return MPI_ERR_QUOTA;
|
||||
case PVFS_ENOMEM:
|
||||
return MPI_ERR_INTERN;
|
||||
default:
|
||||
return MPI_UNDEFINED;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*/
|
||||
|
16
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c
Обычный файл
16
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c
Обычный файл
@ -0,0 +1,16 @@
|
||||
#include "adio.h"
|
||||
#include "ad_pvfs2.h"
|
||||
|
||||
int ADIOI_PVFS2_Feature(ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
return 1;
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -17,20 +17,37 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
if ((fd->info) == MPI_INFO_NULL) {
|
||||
/* part of the open call */
|
||||
MPI_Info_create(&(fd->info));
|
||||
MPI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
|
||||
fd->hints->fs_hints.pvfs2.debugmask = 0;
|
||||
|
||||
MPI_Info_set(fd->info, "striping_factor", "0");
|
||||
ADIOI_Info_set(fd->info, "striping_factor", "0");
|
||||
fd->hints->striping_factor = 0;
|
||||
|
||||
MPI_Info_set(fd->info, "striping_unit", "0");
|
||||
ADIOI_Info_set(fd->info, "striping_unit", "0");
|
||||
fd->hints->striping_unit = 0;
|
||||
|
||||
/* disable the aggressive strided optimizations by default */
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", "disable");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", "disable");
|
||||
fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
|
||||
fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
|
||||
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", "disable");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", "disable");
|
||||
fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
|
||||
fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
|
||||
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", "disable");
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", "disable");
|
||||
fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
|
||||
fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
|
||||
|
||||
|
||||
/* any user-provided hints? */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
/* pvfs2 debugging */
|
||||
value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
MPI_Info_get(users_info, "romio_pvfs2_debugmask",
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_debugmask",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.debugmask =
|
||||
@ -46,11 +63,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
MPI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
|
||||
}
|
||||
|
||||
/* the striping factor */
|
||||
MPI_Info_get(users_info, "striping_factor",
|
||||
ADIOI_Info_get(users_info, "striping_factor",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
tmp_value = fd->hints->striping_factor = atoi(value);
|
||||
@ -65,11 +82,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
MPI_Info_set(fd->info, "striping_factor", value);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
}
|
||||
|
||||
/* the striping unit */
|
||||
MPI_Info_get(users_info, "striping_unit",
|
||||
ADIOI_Info_get(users_info, "striping_unit",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
tmp_value = fd->hints->striping_unit = atoi(value);
|
||||
@ -83,16 +100,167 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
MPI_Info_set(fd->info, "striping_unit", value);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
}
|
||||
|
||||
/* distribution name */
|
||||
MPI_Info_get(users_info, "romio_pvfs2_distribution_name",
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_distribution_name",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
}
|
||||
|
||||
|
||||
/* POSIX read */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_posix_read",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", value);
|
||||
fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_posix_read", value);
|
||||
fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.posix_read;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.posix_read) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"posix_read",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* POSIX write */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_posix_write",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", value);
|
||||
fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_posix_write", value);
|
||||
fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.posix_write;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.posix_write) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"posix_write",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Datatype read */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_dtype_read",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", value);
|
||||
fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_read", value);
|
||||
fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.dtype_read;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_read) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"dtype_read",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Datatype write */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_dtype_write",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", value);
|
||||
fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_write", value);
|
||||
fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.dtype_write;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_write) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"dtype_write",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Listio read */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_listio_read",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", value);
|
||||
fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_listio_read", value);
|
||||
fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.listio_read;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.listio_read) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"listio_read",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Datatype write */
|
||||
ADIOI_Info_get(users_info, "romio_pvfs2_listio_write",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag) {
|
||||
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", value);
|
||||
fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_ENABLE;
|
||||
}
|
||||
else if ( !strcmp(value, "disable") ||
|
||||
!strcmp(value, "DISABLE"))
|
||||
{
|
||||
ADIOI_Info_set(fd->info , "romio_pvfs2_listio_write", value);
|
||||
fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
tmp_value = fd->hints->fs_hints.pvfs2.listio_write;
|
||||
MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
|
||||
if (tmp_value != fd->hints->fs_hints.pvfs2.listio_write) {
|
||||
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
|
||||
"listio_write",
|
||||
error_code);
|
||||
return;
|
||||
}
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
|
||||
}
|
||||
}
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
|
79
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h
Обычный файл
79
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h
Обычный файл
@ -0,0 +1,79 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*-
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*
|
||||
* Copyright (C) 2006 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/* Contig I/O helper prototypes */
|
||||
|
||||
#define READ 0
|
||||
#define WRITE 1
|
||||
|
||||
/* #define DEBUG_CONTIG */
|
||||
/* #define DEBUG_LIST */
|
||||
/* #define DEBUG_DTYPE */
|
||||
|
||||
/* Contig I/O helper prototypes */
|
||||
int ADIOI_PVFS2_Contig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code, int rw_type);
|
||||
|
||||
/* List I/O helper prototypes */
|
||||
int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code, int rw_type);
|
||||
|
||||
int gen_listio_arr(ADIOI_Flatlist_node *flat_buf,
|
||||
int *flat_buf_index_p,
|
||||
int64_t *cur_flat_buf_reg_off_p,
|
||||
int flat_buf_size,
|
||||
int flat_buf_extent,
|
||||
ADIOI_Flatlist_node *flat_file,
|
||||
int *flat_file_index_p,
|
||||
int64_t *cur_flat_file_reg_off_p,
|
||||
int flat_file_size,
|
||||
int flat_file_extent,
|
||||
int max_ol_count,
|
||||
ADIO_Offset disp,
|
||||
int bytes_into_filetype,
|
||||
int64_t *bytes_completed,
|
||||
int64_t total_io_size,
|
||||
int64_t buf_off_arr[],
|
||||
int32_t buf_len_arr[],
|
||||
int32_t *buf_ol_count_p,
|
||||
int64_t file_off_arr[],
|
||||
int32_t file_len_arr[],
|
||||
int32_t *file_ol_count_p);
|
||||
|
||||
void print_buf_file_ol_pairs(int64_t buf_off_arr[],
|
||||
int32_t buf_len_arr[],
|
||||
int32_t buf_ol_count,
|
||||
int64_t file_off_arr[],
|
||||
int32_t file_len_arr[],
|
||||
int32_t file_ol_count,
|
||||
void *buf,
|
||||
int rw_type);
|
||||
|
||||
/* Datatype I/O helper prototypes */
|
||||
int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code, int rw_type);
|
||||
|
||||
int convert_named(MPI_Datatype *mpi_dtype,
|
||||
PVFS_Request *pvfs_dtype, int combiner);
|
||||
|
||||
void print_dtype_info(int combiner,
|
||||
int num_int,
|
||||
int num_addr,
|
||||
int num_dtype,
|
||||
int *arr_int,
|
||||
MPI_Aint *arr_addr,
|
||||
MPI_Datatype *arr_dtype);
|
||||
|
||||
int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype,
|
||||
PVFS_Request *pvfs_dtype);
|
||||
|
720
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c
Обычный файл
720
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c
Обычный файл
@ -0,0 +1,720 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*-
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*
|
||||
* Copyright (C) 2006 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "ad_pvfs2.h"
|
||||
#include "ad_pvfs2_io.h"
|
||||
#include "ad_pvfs2_common.h"
|
||||
|
||||
int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code,
|
||||
int rw_type)
|
||||
{
|
||||
int filetype_size = -1, ret = -1, filetype_is_contig = -1;
|
||||
int num_filetypes = 0, cur_flat_file_reg_off = 0;
|
||||
PVFS_Request tmp_mem_req, mem_req, tmp_file_req, file_req;
|
||||
PVFS_sysresp_io resp_io;
|
||||
ADIO_Offset off = -1, bytes_into_filetype = 0;
|
||||
MPI_Aint filetype_extent = -1;
|
||||
int etype_size = -1, i = -1;
|
||||
PVFS_size pvfs_disp = -1;
|
||||
ADIOI_Flatlist_node *flat_file_p = ADIOI_Flatlist;
|
||||
|
||||
/* Use for offseting the PVFS2 filetype */
|
||||
int pvfs_blk = 1;
|
||||
ADIOI_PVFS2_fs *pvfs_fs;
|
||||
static char myname[] = "ADIOI_PVFS2_STRIDED_DTYPE";
|
||||
|
||||
memset(&tmp_mem_req, 0, sizeof(PVFS_Request));
|
||||
memset(&mem_req, 0, sizeof(PVFS_Request));
|
||||
memset(&tmp_file_req, 0, sizeof(PVFS_Request));
|
||||
memset(&file_req, 0, sizeof(PVFS_Request));
|
||||
|
||||
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
|
||||
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
/* changed below if error */
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
/* datatype is the memory type
|
||||
* fd->filetype is the file type */
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if (filetype_size == 0) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return -1;
|
||||
}
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(fd->etype, &etype_size);
|
||||
if (filetype_size == 0) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* offset is in units of etype relative to the filetype. We
|
||||
* convert this to off in terms of actual data bytes (the offset
|
||||
* minus the number of bytes that are not used). We are allowed
|
||||
* to do this since PVFS2 handles offsets with respect to a
|
||||
* file_req in bytes, otherwise we would have to convert into a
|
||||
* pure byte offset as is done in other methods. Explicit offset
|
||||
* case is handled by using fd->disp and byte-converted off. */
|
||||
|
||||
pvfs_disp = fd->disp;
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
if (filetype_is_contig)
|
||||
{
|
||||
off = fd->fp_ind - fd->disp;
|
||||
}
|
||||
else
|
||||
{
|
||||
int flag = 0;
|
||||
/* Should have already been flattened in ADIO_Open*/
|
||||
while (flat_file_p->type != fd->filetype)
|
||||
{
|
||||
flat_file_p = flat_file_p->next;
|
||||
}
|
||||
num_filetypes = -1;
|
||||
while (!flag)
|
||||
{
|
||||
num_filetypes++;
|
||||
for (i = 0; i < flat_file_p->count; i++)
|
||||
{
|
||||
/* Start on a non zero-length region */
|
||||
if (flat_file_p->blocklens[i])
|
||||
{
|
||||
if (fd->disp + flat_file_p->indices[i] +
|
||||
(num_filetypes * filetype_extent) +
|
||||
flat_file_p->blocklens[i] > fd->fp_ind &&
|
||||
fd->disp + flat_file_p->indices[i] <=
|
||||
fd->fp_ind)
|
||||
{
|
||||
cur_flat_file_reg_off = fd->fp_ind -
|
||||
(fd->disp + flat_file_p->indices[i] +
|
||||
(num_filetypes * filetype_extent));
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
bytes_into_filetype += flat_file_p->blocklens[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Impossible that we don't find it in this datatype */
|
||||
assert(i != flat_file_p->count);
|
||||
off = bytes_into_filetype + cur_flat_file_reg_off;
|
||||
}
|
||||
}
|
||||
else /* ADIO_EXPLICIT */
|
||||
{
|
||||
off = etype_size * offset;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: (fd->fp_ind=%Ld,fd->disp=%Ld,"
|
||||
" offset=%Ld),(pvfs_disp=%Ld,off=%Ld)\n",
|
||||
fd->fp_ind, fd->disp, offset, pvfs_disp, off);
|
||||
#endif
|
||||
|
||||
|
||||
/* Convert the MPI memory and file datatypes into
|
||||
* PVFS2 datatypes */
|
||||
ret = convert_mpi_pvfs2_dtype(&datatype, &tmp_mem_req);
|
||||
if (ret < 0)
|
||||
{
|
||||
goto error_state;
|
||||
}
|
||||
ret = convert_mpi_pvfs2_dtype(&(fd->filetype), &tmp_file_req);
|
||||
if (ret < 0)
|
||||
{
|
||||
goto error_state;
|
||||
}
|
||||
|
||||
ret = PVFS_Request_contiguous(count, tmp_mem_req, &mem_req);
|
||||
if (ret != 0) /* TODO: convert this to MPIO error handling */
|
||||
fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final"
|
||||
" CONTIG memory type\n");
|
||||
PVFS_Request_free(&tmp_mem_req);
|
||||
|
||||
/* pvfs_disp is used to offset the filetype */
|
||||
ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp,
|
||||
tmp_file_req, &file_req);
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: error in final"
|
||||
" HINDEXED file type\n");
|
||||
PVFS_Request_free(&tmp_file_req);
|
||||
|
||||
if (rw_type == READ)
|
||||
ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, off, buf,
|
||||
mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
else
|
||||
ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, off, buf,
|
||||
mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_"
|
||||
"read/write returned %d and completed %Ld bytes.\n",
|
||||
ret, resp_io.total_completed);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(ret),
|
||||
"Error in PVFS_sys_io \n", 0);
|
||||
goto error_state;
|
||||
}
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
fd->fp_ind = off += resp_io.total_completed;
|
||||
}
|
||||
|
||||
error_state:
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: "
|
||||
"resp_io.total_completed=%Ld,ret=%d\n",
|
||||
resp_io.total_completed, ret);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, (int)resp_io.total_completed);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
* keep track of how much data was actually acccessed by
|
||||
* ADIOI_BUFFERED operations */
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* convert_mpi_pvfs2_dtype - Convert a MPI datatype into
|
||||
* a PVFS2 datatype so that we can natively use the PVFS2
|
||||
* datatypes in the PVFS2 I/O calls instead of converting
|
||||
* all datatypes to the hindexed method
|
||||
* return 1 - a leaf node
|
||||
* return 0 - normal return
|
||||
* return -1 - problems */
|
||||
|
||||
int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype,
|
||||
PVFS_Request *pvfs_dtype)
|
||||
{
|
||||
int num_int = -1, num_addr = -1, num_dtype = -1,
|
||||
combiner = -1, i = -1, ret = -1, leaf = -1;
|
||||
int *arr_int = NULL, *arr_addr = NULL;
|
||||
MPI_Datatype *arr_dtype = NULL;
|
||||
PVFS_Request *old_pvfs_dtype = NULL;
|
||||
PVFS_Request *old_pvfs_dtype_arr = NULL;
|
||||
int arr_count = -1;
|
||||
PVFS_size *pvfs_arr_disp = NULL;
|
||||
int *pvfs_arr_len = NULL;
|
||||
|
||||
MPI_Type_get_envelope(*mpi_dtype,
|
||||
&num_int,
|
||||
&num_addr,
|
||||
&num_dtype,
|
||||
&combiner);
|
||||
|
||||
/* Depending on type of datatype do the following
|
||||
* operations */
|
||||
|
||||
if (combiner == MPI_COMBINER_NAMED)
|
||||
{
|
||||
convert_named(mpi_dtype, pvfs_dtype, combiner);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Allocate space for the arrays necessary for
|
||||
* MPI_Type_get_contents */
|
||||
|
||||
if ((arr_int = ADIOI_Malloc(sizeof(int)*num_int)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate array_int\n");
|
||||
return -1;
|
||||
}
|
||||
if ((arr_addr = ADIOI_Malloc(sizeof(int)*num_addr)) == NULL)
|
||||
{
|
||||
ADIOI_Free(arr_int);
|
||||
fprintf(stderr, "Failed to allocate array_addr\n");
|
||||
return -1;
|
||||
}
|
||||
if ((arr_dtype = ADIOI_Malloc(sizeof(MPI_Datatype)*num_dtype)) == NULL)
|
||||
{
|
||||
ADIOI_Free(arr_int);
|
||||
ADIOI_Free(arr_addr);
|
||||
fprintf(stderr, "Failed to allocate array_dtypes\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
MPI_Type_get_contents(*mpi_dtype,
|
||||
num_int,
|
||||
num_addr,
|
||||
num_dtype,
|
||||
arr_int,
|
||||
arr_addr,
|
||||
arr_dtype);
|
||||
|
||||
/* If it's not a predefined datatype, it is either a
|
||||
* derived datatype or a structured datatype */
|
||||
|
||||
if (combiner != MPI_COMBINER_STRUCT)
|
||||
{
|
||||
if ((old_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL)
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate PVFS_Request\n");
|
||||
switch (combiner)
|
||||
{
|
||||
case MPI_COMBINER_CONTIGUOUS:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
ret = PVFS_Request_contiguous(arr_int[0],
|
||||
*old_pvfs_dtype, pvfs_dtype);
|
||||
break;
|
||||
case MPI_COMBINER_VECTOR:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
ret = PVFS_Request_vector(arr_int[0], arr_int[1],
|
||||
arr_int[2], *old_pvfs_dtype,
|
||||
pvfs_dtype);
|
||||
break;
|
||||
case MPI_COMBINER_HVECTOR:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
ret = PVFS_Request_hvector(arr_int[0], arr_int[1],
|
||||
arr_addr[0], *old_pvfs_dtype,
|
||||
pvfs_dtype);
|
||||
break;
|
||||
/* Both INDEXED and HINDEXED types require PVFS_size
|
||||
* address arrays. Therefore, we need to copy and
|
||||
* convert the data from MPI_get_contents() into
|
||||
* a PVFS_size buffer */
|
||||
case MPI_COMBINER_INDEXED:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
if ((pvfs_arr_disp =
|
||||
ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0)
|
||||
{
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate pvfs_arr_disp\n");
|
||||
}
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
pvfs_arr_disp[i] =
|
||||
(PVFS_size) arr_int[arr_int[0]+1+i];
|
||||
}
|
||||
ret = PVFS_Request_indexed(arr_int[0], &arr_int[1],
|
||||
pvfs_arr_disp,
|
||||
*old_pvfs_dtype, pvfs_dtype);
|
||||
ADIOI_Free(pvfs_arr_disp);
|
||||
break;
|
||||
case MPI_COMBINER_HINDEXED:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
if ((pvfs_arr_disp =
|
||||
ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0)
|
||||
{
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate pvfs_arr_disp\n");
|
||||
}
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
pvfs_arr_disp[i] =
|
||||
(PVFS_size) arr_addr[i];
|
||||
}
|
||||
ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1],
|
||||
(int64_t *)&arr_addr[0],
|
||||
*old_pvfs_dtype, pvfs_dtype);
|
||||
ADIOI_Free(pvfs_arr_disp);
|
||||
break;
|
||||
case MPI_COMBINER_DUP:
|
||||
leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
|
||||
ret = PVFS_Request_contiguous(1,
|
||||
*old_pvfs_dtype, pvfs_dtype);
|
||||
|
||||
break;
|
||||
case MPI_COMBINER_INDEXED_BLOCK:
|
||||
/* No native PVFS2 support for this operation currently */
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"INDEXED_BLOCK is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_HINDEXED_INTEGER:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"HINDEXED_INTEGER is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_STRUCT_INTEGER:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"STRUCT_INTEGER is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_SUBARRAY:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"SUBARRAY is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_DARRAY:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"DARRAY is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_F90_REAL:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"F90_REAL is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_F90_COMPLEX:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"F90_COMPLEX is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_F90_INTEGER:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"F90_INTEGER is unsupported\n");
|
||||
break;
|
||||
case MPI_COMBINER_RESIZED:
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"RESIZED is unsupported\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "Error in PVFS_Request_* "
|
||||
"for a derived datatype\n");
|
||||
|
||||
#ifdef DEBUG_DTYPE
|
||||
print_dtype_info(combiner,
|
||||
num_int,
|
||||
num_addr,
|
||||
num_dtype,
|
||||
arr_int,
|
||||
arr_addr,
|
||||
arr_dtype);
|
||||
#endif
|
||||
|
||||
if (leaf != 1 && combiner != MPI_COMBINER_DUP)
|
||||
MPI_Type_free(&arr_dtype[0]);
|
||||
|
||||
ADIOI_Free(arr_int);
|
||||
ADIOI_Free(arr_addr);
|
||||
ADIOI_Free(arr_dtype);
|
||||
|
||||
PVFS_Request_free(old_pvfs_dtype);
|
||||
ADIOI_Free(old_pvfs_dtype);
|
||||
|
||||
return ret;
|
||||
}
|
||||
else /* MPI_COMBINER_STRUCT */
|
||||
{
|
||||
MPI_Aint mpi_lb = -1, mpi_extent = -1;
|
||||
PVFS_offset pvfs_lb = -1;
|
||||
PVFS_size pvfs_extent = -1;
|
||||
int has_lb_ub = 0;
|
||||
|
||||
/* When converting into a PVFS_Request_struct, we no longer
|
||||
* can use MPI_LB and MPI_UB. Therfore, we have to do the
|
||||
* following.
|
||||
* We simply ignore all the MPI_LB and MPI_UB types and
|
||||
* get the lb and extent and pass it on through a
|
||||
* PVFS resized_req */
|
||||
|
||||
arr_count = 0;
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
if (arr_dtype[i] != MPI_LB &&
|
||||
arr_dtype[i] != MPI_UB)
|
||||
{
|
||||
arr_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (arr_int[0] != arr_count)
|
||||
{
|
||||
MPI_Type_get_extent(*mpi_dtype, &mpi_lb, &mpi_extent);
|
||||
pvfs_lb = mpi_lb;
|
||||
pvfs_extent = mpi_extent;
|
||||
if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int)))
|
||||
== NULL)
|
||||
{
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate pvfs_arr_len\n");
|
||||
}
|
||||
has_lb_ub = 1;
|
||||
}
|
||||
|
||||
if ((old_pvfs_dtype_arr
|
||||
= ADIOI_Malloc(arr_count*sizeof(PVFS_Request))) == NULL)
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate PVFS_Requests\n");
|
||||
|
||||
if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size)))
|
||||
== NULL)
|
||||
{
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate pvfs_arr_disp\n");
|
||||
}
|
||||
|
||||
arr_count = 0;
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
if (arr_dtype[i] != MPI_LB &&
|
||||
arr_dtype[i] != MPI_UB)
|
||||
{
|
||||
leaf = convert_mpi_pvfs2_dtype(
|
||||
&arr_dtype[i], &old_pvfs_dtype_arr[arr_count]);
|
||||
if (leaf != 1)
|
||||
MPI_Type_free(&arr_dtype[i]);
|
||||
pvfs_arr_disp[arr_count] =
|
||||
(PVFS_size) arr_addr[i];
|
||||
if (has_lb_ub)
|
||||
{
|
||||
pvfs_arr_len[arr_count] =
|
||||
arr_int[i+1];
|
||||
}
|
||||
arr_count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If a MPI_UB or MPI_LB did exist, we have to
|
||||
* resize the datatype */
|
||||
if (has_lb_ub)
|
||||
{
|
||||
PVFS_Request *tmp_pvfs_dtype = NULL;
|
||||
if ((tmp_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL)
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: "
|
||||
"Failed to allocate PVFS_Request\n");
|
||||
|
||||
ret = PVFS_Request_struct(arr_count, pvfs_arr_len,
|
||||
pvfs_arr_disp,
|
||||
old_pvfs_dtype_arr, tmp_pvfs_dtype);
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "Error in PVFS_Request_struct\n");
|
||||
|
||||
arr_count = 0;
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
if (arr_dtype[i] != MPI_LB &&
|
||||
arr_dtype[i] != MPI_UB)
|
||||
{
|
||||
PVFS_Request_free(&old_pvfs_dtype_arr[arr_count]);
|
||||
arr_count++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "STRUCT(WITHOUT %d LB or UB)(%d,[",
|
||||
arr_int[0] - arr_count, arr_count);
|
||||
for (i = 0; i < arr_count; i++)
|
||||
fprintf(stderr, "(%d,%Ld) ",
|
||||
pvfs_arr_len[i],
|
||||
pvfs_arr_disp[i]);
|
||||
fprintf(stderr, "]\n");
|
||||
fprintf(stderr, "RESIZED(LB = %Ld, EXTENT = %Ld)\n",
|
||||
pvfs_lb, pvfs_extent);
|
||||
#endif
|
||||
ret = PVFS_Request_resized(*tmp_pvfs_dtype,
|
||||
pvfs_lb, pvfs_extent, pvfs_dtype);
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "Error in PVFS_Request_resize\n");
|
||||
|
||||
PVFS_Request_free(tmp_pvfs_dtype);
|
||||
ADIOI_Free(tmp_pvfs_dtype);
|
||||
}
|
||||
else /* No MPI_LB or MPI_UB datatypes */
|
||||
{
|
||||
ret = PVFS_Request_struct(arr_int[0], &arr_int[1],
|
||||
pvfs_arr_disp,
|
||||
old_pvfs_dtype_arr, pvfs_dtype);
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "Error in PVFS_Request_struct\n");
|
||||
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
{
|
||||
if (arr_dtype[i] != MPI_LB &&
|
||||
arr_dtype[i] != MPI_UB)
|
||||
PVFS_Request_free(&old_pvfs_dtype_arr[i]);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_DTYPE
|
||||
print_dtype_info(combiner,
|
||||
num_int,
|
||||
num_addr,
|
||||
num_dtype,
|
||||
arr_int,
|
||||
arr_addr,
|
||||
arr_dtype);
|
||||
#endif
|
||||
}
|
||||
|
||||
ADIOI_Free(arr_int);
|
||||
ADIOI_Free(arr_addr);
|
||||
ADIOI_Free(arr_dtype);
|
||||
|
||||
ADIOI_Free(old_pvfs_dtype_arr);
|
||||
ADIOI_Free(pvfs_arr_disp);
|
||||
ADIOI_Free(pvfs_arr_len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Shouldn't have gotten here */
|
||||
fprintf(stderr, "convert_mpi_pvfs2_dtype: SERIOUS ERROR\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int convert_named(MPI_Datatype *mpi_dtype,
|
||||
PVFS_Request *pvfs_dtype, int combiner)
|
||||
{
|
||||
int ret = -1;
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "NAMED");
|
||||
#endif
|
||||
|
||||
switch (*mpi_dtype)
|
||||
{
|
||||
case MPI_CHAR:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_CHAR, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_CHAR\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_BYTE:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_BYTE, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_BYTE\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_SHORT:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_SHORT, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_SHORT\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_INT:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_INT, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_INT\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_LONG:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_LONG, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_LONG\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_FLOAT:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_FLOAT, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_FLOAT\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_DOUBLE:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_DOUBLE, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_DOUBLE\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_UNSIGNED_CHAR:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_CHAR, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_UNSIGNED_CHAR\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_UNSIGNED_SHORT:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_UNSIGNED_SHORT\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_UNSIGNED:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_SHORT\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_UNSIGNED_LONG:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_LONG, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_UNSIGNED_LONG\n");
|
||||
#endif
|
||||
break;
|
||||
case MPI_LONG_DOUBLE:
|
||||
ret = PVFS_Request_contiguous(1, PVFS_LONG_DOUBLE, pvfs_dtype);
|
||||
#ifdef DEBUG_DTYPE
|
||||
fprintf(stderr, "-MPI_LONG_DOUBLE\n");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "convert_named: predefined type not found");
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "convert_named: Datatype creation failed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
void print_dtype_info(int combiner,
|
||||
int num_int,
|
||||
int num_addr,
|
||||
int num_dtype,
|
||||
int *arr_int,
|
||||
MPI_Aint *arr_addr,
|
||||
MPI_Datatype *arr_dtype)
|
||||
{
|
||||
int i = -1;
|
||||
switch (combiner)
|
||||
{
|
||||
case MPI_COMBINER_CONTIGUOUS:
|
||||
fprintf(stderr, "CONTIG(%d)\n", arr_int[0]);
|
||||
break;
|
||||
case MPI_COMBINER_VECTOR:
|
||||
fprintf(stderr, "VECTOR(%d,%d,%d)\n",
|
||||
arr_int[0], arr_int[1], arr_int[2]);
|
||||
break;
|
||||
case MPI_COMBINER_HVECTOR:
|
||||
fprintf(stderr, "HVECTOR(%d,%d,%d)\n",
|
||||
arr_int[0], arr_int[1],arr_addr[0]);
|
||||
break;
|
||||
case MPI_COMBINER_INDEXED:
|
||||
fprintf(stderr, "INDEXED(%d,[",
|
||||
arr_int[0]);
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
fprintf(stderr, "(%d,%Ld) ",
|
||||
arr_int[1+i],
|
||||
(int64_t) arr_int[arr_int[0]+1+i]);
|
||||
fprintf(stderr, "]\n");
|
||||
break;
|
||||
case MPI_COMBINER_HINDEXED:
|
||||
fprintf(stderr, "HINDEXED(%d,[",
|
||||
arr_int[0]);
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
fprintf(stderr, "(%d,%Ld) ",
|
||||
arr_int[1+i],
|
||||
(int64_t) arr_addr[i]);
|
||||
fprintf(stderr, "]\n");
|
||||
break;
|
||||
case MPI_COMBINER_STRUCT:
|
||||
fprintf(stderr, "STRUCT(%d,[",
|
||||
arr_int[0]);
|
||||
for (i = 0; i < arr_int[0]; i++)
|
||||
fprintf(stderr, "(%d,%Ld) ",
|
||||
arr_int[1+i],
|
||||
(int64_t) arr_addr[i]);
|
||||
fprintf(stderr, "]\n");
|
||||
break;
|
||||
case MPI_COMBINER_DUP:
|
||||
fprintf(stderr, "DUP\n");
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "no available information on this datatype");
|
||||
}
|
||||
}
|
665
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c
Обычный файл
665
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c
Обычный файл
@ -0,0 +1,665 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*-
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*
|
||||
* Copyright (C) 2006 Unknown (TODO: fix this)
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "ad_pvfs2.h"
|
||||
#include "ad_pvfs2_io.h"
|
||||
#include "ad_pvfs2_common.h"
|
||||
|
||||
#define COALESCE_REGIONS /* TODO: would we ever want to *not* coalesce? */
|
||||
#define MAX_OL_COUNT 64
|
||||
int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code, int rw_type)
|
||||
{
|
||||
/* list I/O parameters */
|
||||
int i = -1, ret = -1;
|
||||
int tmp_filetype_size = -1;
|
||||
int64_t cur_io_size = 0, io_size = 0;
|
||||
int etype_size = -1;
|
||||
int num_etypes_in_filetype = -1, num_filetypes = -1;
|
||||
int etypes_in_filetype = -1, size_in_filetype = -1;
|
||||
int bytes_into_filetype = 0;
|
||||
MPI_Offset total_bytes_accessed = 0;
|
||||
|
||||
/* parameters for offset-length pairs arrays */
|
||||
int64_t buf_off_arr[MAX_OL_COUNT];
|
||||
int32_t buf_len_arr[MAX_OL_COUNT];
|
||||
int64_t file_off_arr[MAX_OL_COUNT];
|
||||
int32_t file_len_arr[MAX_OL_COUNT];
|
||||
int32_t buf_ol_count = 0;
|
||||
int32_t file_ol_count = 0;
|
||||
|
||||
/* parameters for flattened memory and file datatypes*/
|
||||
int flat_buf_index = 0;
|
||||
int flat_file_index = 0;
|
||||
int64_t cur_flat_buf_reg_off = 0;
|
||||
int64_t cur_flat_file_reg_off = 0;
|
||||
ADIOI_Flatlist_node *flat_buf_p, *flat_file_p;
|
||||
int buftype_size = -1, buftype_extent = -1,
|
||||
filetype_size = -1, filetype_extent = -1;
|
||||
int buftype_is_contig = -1, filetype_is_contig = -1;
|
||||
|
||||
/* PVFS2 specific parameters */
|
||||
PVFS_Request mem_req, file_req;
|
||||
ADIOI_PVFS2_fs * pvfs_fs;
|
||||
PVFS_sysresp_io resp_io;
|
||||
static char myname[] = "ADIOI_PVFS2_STRIDED_LISTIO";
|
||||
|
||||
if (fd->atomicity) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_ARG,
|
||||
"Atomic noncontiguous writes"
|
||||
" are not supported by PVFS2", 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if (filetype_size == 0) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return -1;
|
||||
}
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
io_size = buftype_size*count;
|
||||
|
||||
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
|
||||
|
||||
/* Flatten the memory datatype
|
||||
* (file datatype has already been flattened in ADIO open
|
||||
* unless it is contibuous, then we need to flatten it manually)
|
||||
* and set the correct buffers for flat_buf and flat_file */
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
if (buftype_is_contig == 0)
|
||||
{
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf_p = ADIOI_Flatlist;
|
||||
while (flat_buf_p->type != datatype)
|
||||
flat_buf_p = flat_buf_p->next;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* flatten and add to the list */
|
||||
flat_buf_p = (ADIOI_Flatlist_node *) ADIOI_Malloc
|
||||
(sizeof(ADIOI_Flatlist_node));
|
||||
flat_buf_p->blocklens = (ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset));
|
||||
flat_buf_p->indices =
|
||||
(ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset));
|
||||
/* For the buffer, we can optimize the buftype, this is not
|
||||
* possible with the filetype since it is tiled */
|
||||
buftype_size = buftype_size*count;
|
||||
buftype_extent = buftype_size*count;
|
||||
flat_buf_p->blocklens[0] = buftype_size;
|
||||
flat_buf_p->indices[0] = 0;
|
||||
flat_buf_p->count = 1;
|
||||
}
|
||||
if (filetype_is_contig == 0)
|
||||
{
|
||||
/* TODO: why does avery say this should already have been
|
||||
* flattened in Open, but also says contig types don't get
|
||||
* flattened */
|
||||
ADIOI_Flatten_datatype(fd->filetype);
|
||||
flat_file_p = ADIOI_Flatlist;
|
||||
while (flat_file_p->type != fd->filetype)
|
||||
flat_file_p = flat_file_p->next;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* flatten and add to the list */
|
||||
flat_file_p = (ADIOI_Flatlist_node *) ADIOI_Malloc
|
||||
(sizeof(ADIOI_Flatlist_node));
|
||||
flat_file_p->blocklens =(ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset));
|
||||
flat_file_p->indices =
|
||||
(ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset));
|
||||
flat_file_p->blocklens[0] = filetype_size;
|
||||
flat_file_p->indices[0] = 0;
|
||||
flat_file_p->count = 1;
|
||||
}
|
||||
|
||||
/* Find out where we are in the flattened filetype (the block index,
|
||||
* how far into the block, and how many bytes_into_filetype)
|
||||
* If the file_ptr_type == ADIO_INDIVIDUAL we will use disp, fp_ind
|
||||
* to figure this out (offset should always be zero)
|
||||
* If file_ptr_type == ADIO_EXPLICIT, we will use disp and offset
|
||||
* to figure this out. */
|
||||
|
||||
etype_size = fd->etype_size;
|
||||
num_etypes_in_filetype = filetype_size / etype_size;
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
int flag = 0;
|
||||
/* Should have already been flattened in ADIO_Open*/
|
||||
num_filetypes = -1;
|
||||
while (!flag)
|
||||
{
|
||||
num_filetypes++;
|
||||
for (i = 0; i < flat_file_p->count; i++)
|
||||
{
|
||||
/* Start on a non zero-length region */
|
||||
if (flat_file_p->blocklens[i])
|
||||
{
|
||||
if (fd->disp + flat_file_p->indices[i] +
|
||||
(num_filetypes * filetype_extent) +
|
||||
flat_file_p->blocklens[i] > fd->fp_ind &&
|
||||
fd->disp + flat_file_p->indices[i] <=
|
||||
fd->fp_ind)
|
||||
{
|
||||
flat_file_index = i;
|
||||
cur_flat_file_reg_off = fd->fp_ind -
|
||||
(fd->disp + flat_file_p->indices[i] +
|
||||
(num_filetypes * filetype_extent));
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
bytes_into_filetype += flat_file_p->blocklens[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Impossible that we don't find it in this datatype */
|
||||
assert(i != flat_file_p->count);
|
||||
}
|
||||
else
|
||||
{
|
||||
num_filetypes = (int) (offset / num_etypes_in_filetype);
|
||||
etypes_in_filetype = (int) (offset % num_etypes_in_filetype);
|
||||
size_in_filetype = etypes_in_filetype * etype_size;
|
||||
|
||||
tmp_filetype_size = 0;
|
||||
for (i=0; i<flat_file_p->count; i++) {
|
||||
tmp_filetype_size += flat_file_p->blocklens[i];
|
||||
if (tmp_filetype_size > size_in_filetype)
|
||||
{
|
||||
flat_file_index = i;
|
||||
cur_flat_file_reg_off = flat_file_p->blocklens[i] -
|
||||
(tmp_filetype_size - size_in_filetype);
|
||||
bytes_into_filetype = offset * filetype_size -
|
||||
flat_file_p->blocklens[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_LIST
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedListIO: (fd->fp_ind=%Ld,fd->disp=%Ld,"
|
||||
" offset=%Ld)\n(flat_file_index=%d,cur_flat_file_reg_off=%Ld,"
|
||||
"bytes_into_filetype=%d)\n",
|
||||
fd->fp_ind, fd->disp, offset, flat_file_index,
|
||||
cur_flat_file_reg_off, bytes_into_filetype);
|
||||
#endif
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "flat_buf:\n");
|
||||
for (i = 0; i < flat_buf_p->count; i++)
|
||||
fprintf(stderr, "(offset, length) = (%Ld, %d)\n",
|
||||
flat_buf_p->indices[i],
|
||||
flat_buf_p->blocklens[i]);
|
||||
fprintf(stderr, "flat_file:\n");
|
||||
for (i = 0; i < flat_file_p->count; i++)
|
||||
fprintf(stderr, "(offset, length) = (%Ld, %d)\n",
|
||||
flat_file_p->indices[i],
|
||||
flat_file_p->blocklens[i]);
|
||||
#endif
|
||||
|
||||
/* total data written */
|
||||
cur_io_size = 0;
|
||||
while (cur_io_size != io_size)
|
||||
{
|
||||
/* Initialize the temporarily unrolling lists and
|
||||
* and associated variables */
|
||||
buf_ol_count = 0;
|
||||
file_ol_count = 0;
|
||||
for (i = 0; i < MAX_OL_COUNT; i++)
|
||||
{
|
||||
buf_off_arr[i] = 0;
|
||||
buf_len_arr[i] = 0;
|
||||
file_off_arr[i] = 0;
|
||||
file_len_arr[i] = 0;
|
||||
}
|
||||
|
||||
/* Generate the offset-length pairs for a
|
||||
* list I/O operation */
|
||||
gen_listio_arr(flat_buf_p,
|
||||
&flat_buf_index,
|
||||
&cur_flat_buf_reg_off,
|
||||
buftype_size,
|
||||
buftype_extent,
|
||||
flat_file_p,
|
||||
&flat_file_index,
|
||||
&cur_flat_file_reg_off,
|
||||
filetype_size,
|
||||
filetype_extent,
|
||||
MAX_OL_COUNT,
|
||||
fd->disp,
|
||||
bytes_into_filetype,
|
||||
&cur_io_size,
|
||||
io_size,
|
||||
buf_off_arr,
|
||||
buf_len_arr,
|
||||
&buf_ol_count,
|
||||
file_off_arr,
|
||||
file_len_arr,
|
||||
&file_ol_count);
|
||||
|
||||
assert(buf_ol_count <= MAX_OL_COUNT);
|
||||
assert(file_ol_count <= MAX_OL_COUNT);
|
||||
#ifdef DEBUG_LIST2
|
||||
print_buf_file_ol_pairs(buf_off_arr,
|
||||
buf_len_arr,
|
||||
buf_ol_count,
|
||||
file_off_arr,
|
||||
file_len_arr,
|
||||
file_ol_count,
|
||||
buf,
|
||||
rw_type);
|
||||
#endif
|
||||
#ifdef DEBUG_LIST2
|
||||
do {
|
||||
int y, z;
|
||||
fprintf(stderr, "ad_pvfs2_io_list.c::\n");
|
||||
for (y = 0; y < buf_ol_count; y++)
|
||||
{
|
||||
for (z = 0; z < buf_len_arr[y]; z++)
|
||||
{
|
||||
fprintf(stderr, "buf[%d][%d]=%c\n",
|
||||
y, z, ((char *) buf + buf_off_arr[y])[z]);
|
||||
}
|
||||
}
|
||||
} while (0);
|
||||
#endif
|
||||
|
||||
/* Run list I/O operation */
|
||||
ret = PVFS_Request_hindexed(buf_ol_count, buf_len_arr,
|
||||
buf_off_arr, PVFS_BYTE, &mem_req);
|
||||
|
||||
ret = PVFS_Request_hindexed(file_ol_count, file_len_arr,
|
||||
file_off_arr, PVFS_BYTE, &file_req);
|
||||
if (rw_type == READ)
|
||||
{
|
||||
ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
buf, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
|
||||
buf, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
}
|
||||
if (ret != 0)
|
||||
{
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedListIO: Warning - PVFS_sys_"
|
||||
"read/write returned %d and completed %Ld bytes.\n",
|
||||
ret, resp_io.total_completed);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(ret),
|
||||
"Error in PVFS_sys_io \n", 0);
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
goto error_state;
|
||||
}
|
||||
total_bytes_accessed += resp_io.total_completed;
|
||||
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_LIST
|
||||
fprintf(stderr, "ADIOI_PVFS2_StridedListIO: "
|
||||
"total_bytes_accessed=%Ld,ret=%d\n",
|
||||
total_bytes_accessed, ret);
|
||||
#endif
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind += total_bytes_accessed;
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
error_state:
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
/* TODO: why the cast? */
|
||||
MPIR_Status_set_bytes(status, datatype, (int)total_bytes_accessed);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
if (buftype_is_contig == 0)
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
else
|
||||
{
|
||||
ADIOI_Free(flat_buf_p->blocklens);
|
||||
ADIOI_Free(flat_buf_p->indices);
|
||||
ADIOI_Free(flat_buf_p);
|
||||
}
|
||||
|
||||
if (filetype_is_contig == 0)
|
||||
ADIOI_Delete_flattened(fd->filetype);
|
||||
else
|
||||
{
|
||||
ADIOI_Free(flat_file_p->blocklens);
|
||||
ADIOI_Free(flat_file_p->indices);
|
||||
ADIOI_Free(flat_file_p);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* To do: Fix the code to coalesce the offset-length pairs for memory
|
||||
* and file. */
|
||||
|
||||
/* gen_listio_arr - fills in offset-length pairs for memory and file
|
||||
* for list I/O */
|
||||
int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p,
|
||||
int *flat_buf_index_p,
|
||||
int64_t *cur_flat_buf_reg_off_p,
|
||||
int flat_buf_size,
|
||||
int flat_buf_extent,
|
||||
ADIOI_Flatlist_node *flat_file_p,
|
||||
int *flat_file_index_p,
|
||||
int64_t *cur_flat_file_reg_off_p,
|
||||
int flat_file_size,
|
||||
int flat_file_extent,
|
||||
int max_ol_count,
|
||||
ADIO_Offset disp,
|
||||
int bytes_into_filetype,
|
||||
int64_t *bytes_completed,
|
||||
int64_t total_io_size,
|
||||
int64_t buf_off_arr[],
|
||||
int32_t buf_len_arr[],
|
||||
int32_t *buf_ol_count_p,
|
||||
int64_t file_off_arr[],
|
||||
int32_t file_len_arr[],
|
||||
int32_t *file_ol_count_p)
|
||||
{
|
||||
int region_size = -1;
|
||||
|
||||
/* parameters for flattened memory and file datatypes*/
|
||||
int64_t cur_flat_buf_reg_left = 0;
|
||||
int64_t cur_flat_file_reg_left = 0;
|
||||
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "gen_list_arr:\n");
|
||||
#endif
|
||||
|
||||
if ((*buf_ol_count_p) != 0 ||(*file_ol_count_p) != 0)
|
||||
{
|
||||
fprintf(stderr, "buf_ol_count != 0 || file_ol_count != 0\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Start on a non-zero memory and file region
|
||||
* Note this does not affect the bytes_completed
|
||||
* since no data is in these regions. Initialize the
|
||||
* first memory and file offsets. */
|
||||
while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0)
|
||||
{
|
||||
(*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
|
||||
flat_buf_p->count;
|
||||
}
|
||||
buf_off_arr[*buf_ol_count_p] =
|
||||
(*bytes_completed / flat_buf_size) *
|
||||
flat_buf_extent +
|
||||
flat_buf_p->indices[*flat_buf_index_p] +
|
||||
*cur_flat_buf_reg_off_p;
|
||||
buf_len_arr[*buf_ol_count_p] = 0;
|
||||
|
||||
while (flat_file_p->blocklens[(*flat_file_index_p)] == 0)
|
||||
{
|
||||
(*flat_file_index_p) = ((*flat_file_index_p) + 1) %
|
||||
flat_file_p->count;
|
||||
}
|
||||
file_off_arr[*file_ol_count_p] = disp +
|
||||
(((bytes_into_filetype + *bytes_completed) / flat_file_size) *
|
||||
flat_file_extent) +
|
||||
flat_file_p->indices[*flat_file_index_p] +
|
||||
*cur_flat_file_reg_off_p;
|
||||
file_len_arr[*file_ol_count_p] = 0;
|
||||
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "initial buf_off_arr[%d] = %Ld\n", *buf_ol_count_p,
|
||||
buf_off_arr[*buf_ol_count_p]);
|
||||
fprintf(stderr, "initial file_off_arr[%d] = %Ld\n", *file_ol_count_p,
|
||||
file_off_arr[*file_ol_count_p]);
|
||||
#endif
|
||||
|
||||
while (*bytes_completed != total_io_size
|
||||
&& (*buf_ol_count_p) < max_ol_count
|
||||
&& (*file_ol_count_p) < max_ol_count)
|
||||
{
|
||||
/* How much data is left in the current piece in
|
||||
* the flattened datatypes */
|
||||
cur_flat_buf_reg_left = flat_buf_p->blocklens[*flat_buf_index_p]
|
||||
- *cur_flat_buf_reg_off_p;
|
||||
cur_flat_file_reg_left = flat_file_p->blocklens[*flat_file_index_p]
|
||||
- *cur_flat_file_reg_off_p;
|
||||
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr,
|
||||
"flat_buf_index=%d flat_buf->blocklens[%d]=%d\n"
|
||||
"cur_flat_buf_reg_left=%Ld "
|
||||
"*cur_flat_buf_reg_off_p=%Ld\n"
|
||||
"flat_file_index=%d flat_file->blocklens[%d]=%d\n"
|
||||
"cur_flat_file_reg_left=%Ld "
|
||||
"*cur_flat_file_reg_off_p=%Ld\n"
|
||||
"bytes_completed=%Ld\n"
|
||||
"buf_ol_count=%d file_ol_count=%d\n"
|
||||
"buf_len_arr[%d]=%d file_len_arr[%d]=%d\n\n",
|
||||
*flat_buf_index_p, *flat_buf_index_p,
|
||||
flat_buf_p->blocklens[*flat_buf_index_p],
|
||||
cur_flat_buf_reg_left,
|
||||
*cur_flat_buf_reg_off_p,
|
||||
*flat_file_index_p, *flat_file_index_p,
|
||||
flat_file_p->blocklens[*flat_file_index_p],
|
||||
cur_flat_file_reg_left,
|
||||
*cur_flat_file_reg_off_p,
|
||||
*bytes_completed,
|
||||
*buf_ol_count_p, *file_ol_count_p,
|
||||
*buf_ol_count_p,
|
||||
buf_len_arr[*buf_ol_count_p],
|
||||
*file_ol_count_p,
|
||||
file_len_arr[*file_ol_count_p]);
|
||||
#endif
|
||||
|
||||
/* What is the size of the next contiguous region agreed
|
||||
* upon by both memory and file regions that does not
|
||||
* surpass the file size */
|
||||
if (cur_flat_buf_reg_left > cur_flat_file_reg_left)
|
||||
region_size = cur_flat_file_reg_left;
|
||||
else
|
||||
region_size = cur_flat_buf_reg_left;
|
||||
|
||||
if (region_size > total_io_size - *bytes_completed)
|
||||
region_size = total_io_size - *bytes_completed;
|
||||
|
||||
/* Add this piece to both the mem and file arrays
|
||||
* coalescing offset-length pairs if possible and advance
|
||||
* the pointers through the flatten mem and file datatypes
|
||||
* as well Note: no more than a single piece can be done
|
||||
* since we take the smallest one possible */
|
||||
|
||||
if (cur_flat_buf_reg_left == region_size)
|
||||
{
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "reached end of memory block...\n");
|
||||
#endif
|
||||
(*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
|
||||
flat_buf_p->count;
|
||||
while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0)
|
||||
{
|
||||
(*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
|
||||
flat_buf_p->count;
|
||||
}
|
||||
*cur_flat_buf_reg_off_p = 0;
|
||||
|
||||
#ifdef COALESCE_REGIONS
|
||||
if (*buf_ol_count_p != 0)
|
||||
{
|
||||
if (buf_off_arr[(*buf_ol_count_p) - 1] +
|
||||
buf_len_arr[(*buf_ol_count_p) - 1] ==
|
||||
buf_off_arr[*buf_ol_count_p])
|
||||
{
|
||||
buf_len_arr[(*buf_ol_count_p) - 1] +=
|
||||
region_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf_len_arr[*buf_ol_count_p] += region_size;
|
||||
(*buf_ol_count_p)++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif
|
||||
buf_len_arr[*buf_ol_count_p] += region_size;
|
||||
(*buf_ol_count_p)++;
|
||||
#ifdef COALESCE_REGIONS
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Don't prepare for the next piece if we have reached
|
||||
* the limit or else it will segment fault. */
|
||||
if ((*buf_ol_count_p) != max_ol_count)
|
||||
{
|
||||
buf_off_arr[*buf_ol_count_p] =
|
||||
((*bytes_completed + region_size) / flat_buf_size) *
|
||||
flat_buf_extent +
|
||||
flat_buf_p->indices[*flat_buf_index_p] +
|
||||
(*cur_flat_buf_reg_off_p);
|
||||
buf_len_arr[*buf_ol_count_p] = 0;
|
||||
}
|
||||
}
|
||||
else if (cur_flat_buf_reg_left > region_size)
|
||||
{
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "advanced %d in memory block...\n",
|
||||
region_size);
|
||||
#endif
|
||||
(*cur_flat_buf_reg_off_p) += region_size;
|
||||
buf_len_arr[*buf_ol_count_p] += region_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "gen_listio_arr: Error\n");
|
||||
}
|
||||
|
||||
/* To calculate the absolute file offset we need to
|
||||
* add the disp, how many filetypes we have gone through,
|
||||
* the relative block offset in the filetype and how far
|
||||
* into the block we have gone. */
|
||||
if (cur_flat_file_reg_left == region_size)
|
||||
{
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "reached end of file block...\n");
|
||||
#endif
|
||||
(*flat_file_index_p) = ((*flat_file_index_p) + 1) %
|
||||
flat_file_p->count;
|
||||
while (flat_file_p->blocklens[(*flat_file_index_p)] == 0)
|
||||
{
|
||||
(*flat_file_index_p) = ((*flat_file_index_p) + 1) %
|
||||
flat_file_p->count;
|
||||
}
|
||||
(*cur_flat_file_reg_off_p) = 0;
|
||||
|
||||
#ifdef COALESCE_REGIONS
|
||||
if (*file_ol_count_p != 0)
|
||||
{
|
||||
if (file_off_arr[(*file_ol_count_p) - 1] +
|
||||
file_len_arr[(*file_ol_count_p) - 1] ==
|
||||
file_off_arr[*file_ol_count_p])
|
||||
{
|
||||
file_len_arr[(*file_ol_count_p) - 1] +=
|
||||
region_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
file_len_arr[*file_ol_count_p] += region_size;
|
||||
(*file_ol_count_p)++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif
|
||||
file_len_arr[*file_ol_count_p] += region_size;
|
||||
(*file_ol_count_p)++;
|
||||
#ifdef COALESCE_REGIONS
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Don't prepare for the next piece if we have reached
|
||||
* the limit or else it will segment fault. */
|
||||
if ((*file_ol_count_p) != max_ol_count)
|
||||
{
|
||||
file_off_arr[*file_ol_count_p] = disp +
|
||||
(((bytes_into_filetype + *bytes_completed + region_size)
|
||||
/ flat_file_size) *
|
||||
flat_file_extent) +
|
||||
flat_file_p->indices[*flat_file_index_p] +
|
||||
(*cur_flat_file_reg_off_p);
|
||||
file_len_arr[*file_ol_count_p] = 0;
|
||||
}
|
||||
}
|
||||
else if (cur_flat_file_reg_left > region_size)
|
||||
{
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr, "advanced %d in file block...\n",
|
||||
region_size);
|
||||
#endif
|
||||
(*cur_flat_file_reg_off_p) += region_size;
|
||||
file_len_arr[*file_ol_count_p] += region_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "gen_listio_arr: Error\n");
|
||||
}
|
||||
#ifdef DEBUG_LIST2
|
||||
fprintf(stderr,
|
||||
"------------------------------\n\n");
|
||||
#endif
|
||||
*bytes_completed += region_size;
|
||||
}
|
||||
/* Increment the count if we stopped in the middle of a
|
||||
* memory or file region */
|
||||
if (*cur_flat_buf_reg_off_p != 0)
|
||||
(*buf_ol_count_p)++;
|
||||
if (*cur_flat_file_reg_off_p != 0)
|
||||
(*file_ol_count_p)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print_buf_file_ol_pairs(int64_t buf_off_arr[],
|
||||
int32_t buf_len_arr[],
|
||||
int32_t buf_ol_count,
|
||||
int64_t file_off_arr[],
|
||||
int32_t file_len_arr[],
|
||||
int32_t file_ol_count,
|
||||
void *buf,
|
||||
int rw_type)
|
||||
{
|
||||
int i = -1;
|
||||
|
||||
fprintf(stderr, "buf_ol_pairs(offset,length) count = %d\n",
|
||||
buf_ol_count);
|
||||
for (i = 0; i < buf_ol_count; i++)
|
||||
{
|
||||
fprintf(stderr, "(%Ld, %d) ", buf_off_arr[i], buf_len_arr[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fprintf(stderr, "file_ol_pairs(offset,length) count = %d\n",
|
||||
file_ol_count);
|
||||
for (i = 0; i < file_ol_count; i++)
|
||||
{
|
||||
fprintf(stderr, "(%Ld, %d) ", file_off_arr[i], file_len_arr[i]);
|
||||
}
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "ad_pvfs2.h"
|
||||
|
||||
#include "ad_pvfs2_io.h"
|
||||
#include "ad_pvfs2_common.h"
|
||||
|
||||
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
@ -92,898 +92,76 @@ fn_exit:
|
||||
return;
|
||||
}
|
||||
|
||||
static int ADIOI_PVFS2_ReadStridedListIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return ADIOI_PVFS2_StridedListIO(fd, buf, count,
|
||||
datatype, file_ptr_type,
|
||||
offset, status,
|
||||
error_code, READ);
|
||||
}
|
||||
|
||||
static int ADIOI_PVFS2_ReadStridedDtypeIO(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count,
|
||||
datatype, file_ptr_type,
|
||||
offset, status, error_code,
|
||||
READ);
|
||||
}
|
||||
|
||||
void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, brd_size, frd_size=0, st_index=0;
|
||||
int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset off, disp, start_off, initial_off;
|
||||
int flag, st_frd_size, st_n_filetypes;
|
||||
/* four ways (to date) that we can carry out strided i/o accesses:
|
||||
* - naive posix
|
||||
* - 'true' Datatype (from avery)
|
||||
* - new List I/O (from avery)
|
||||
* - classic List I/O (the one that's always been in ROMIO)
|
||||
* I imagine we'll keep Datatype as an optional optimization, and afer a
|
||||
* release or two promote it to the default
|
||||
*/
|
||||
int ret = -1;
|
||||
|
||||
int mem_list_count, file_list_count;
|
||||
PVFS_size *mem_offsets;
|
||||
int64_t *file_offsets;
|
||||
int *mem_lengths;
|
||||
int32_t *file_lengths;
|
||||
int total_blks_to_read;
|
||||
|
||||
int max_mem_list, max_file_list;
|
||||
|
||||
int b_blks_read;
|
||||
int f_data_read;
|
||||
int size_read=0, n_read_lists, extra_blks;
|
||||
|
||||
int end_brd_size, end_frd_size;
|
||||
int start_k, start_j, new_file_read, new_buffer_read;
|
||||
int start_mem_offset;
|
||||
PVFS_Request mem_req, file_req;
|
||||
ADIOI_PVFS2_fs * pvfs_fs;
|
||||
PVFS_sysresp_io resp_io;
|
||||
int err_flag=0;
|
||||
MPI_Offset total_bytes_read = 0;
|
||||
static char myname[] = "ADIOI_PVFS2_ReadStrided";
|
||||
|
||||
#define MAX_ARRAY_SIZE 64
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
/* the HDF5 tests showed a bug in this list processing code (see many many
|
||||
* lines down below). We added a workaround, but common HDF5 file types
|
||||
* are actually contiguous and do not need the expensive workarond */
|
||||
if (!filetype_is_contig) {
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
if (flat_file->count == 1 && !buftype_is_contig)
|
||||
filetype_is_contig = 1;
|
||||
}
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if ( ! filetype_size ) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
if (fd->hints->fs_hints.pvfs2.posix_read == ADIOI_HINT_ENABLE) {
|
||||
ADIOI_GEN_ReadStrided(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
if (fd->hints->fs_hints.pvfs2.dtype_read == ADIOI_HINT_ENABLE) {
|
||||
ret = ADIOI_PVFS2_ReadStridedDtypeIO(fd, buf, count,
|
||||
datatype, file_ptr_type,
|
||||
offset, status, error_code);
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
|
||||
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
int64_t file_offsets;
|
||||
int32_t file_lengths;
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
|
||||
file_list_count = 1;
|
||||
file_offsets = off;
|
||||
file_lengths = 0;
|
||||
total_blks_to_read = count*flat_buf->count;
|
||||
b_blks_read = 0;
|
||||
|
||||
/* allocate arrays according to max usage */
|
||||
if (total_blks_to_read > MAX_ARRAY_SIZE)
|
||||
mem_list_count = MAX_ARRAY_SIZE;
|
||||
else mem_list_count = total_blks_to_read;
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
|
||||
mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
|
||||
|
||||
/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
|
||||
|
||||
j = 0;
|
||||
/* step through each block in memory, filling memory arrays */
|
||||
while (b_blks_read < total_blks_to_read) {
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
|
||||
/* TODO: fix this compiler warning */
|
||||
((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
|
||||
mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
|
||||
flat_buf->blocklens[i];
|
||||
file_lengths += flat_buf->blocklens[i];
|
||||
b_blks_read++;
|
||||
if (!(b_blks_read % MAX_ARRAY_SIZE) ||
|
||||
(b_blks_read == total_blks_to_read)) {
|
||||
|
||||
/* in the case of the last read list call,
|
||||
adjust mem_list_count */
|
||||
if (b_blks_read == total_blks_to_read) {
|
||||
mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
|
||||
/* in case last read list call fills max arrays */
|
||||
if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
|
||||
}
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count,
|
||||
mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
|
||||
if (err_flag < 0) break;
|
||||
err_flag = PVFS_Request_contiguous(file_lengths,
|
||||
PVFS_BYTE, &file_req);
|
||||
if (err_flag < 0) break;
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
|
||||
file_offsets, PVFS_BOTTOM, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* in the case of error or the last read list call,
|
||||
* leave here */
|
||||
if (err_flag || b_blks_read == total_blks_to_read) break;
|
||||
|
||||
file_offsets += file_lengths;
|
||||
file_lengths = 0;
|
||||
}
|
||||
} /* for (i=0; i<flat_buf->count; i++) */
|
||||
j++;
|
||||
} /* while (b_blks_read < total_blks_to_read) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind += total_bytes_read;
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This isa temporary way of filling in status. The right way is to
|
||||
keep tracke of how much data was actually read adn placed in buf
|
||||
by ADIOI_BUFFERED_READ. */
|
||||
#endif
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
|
||||
return;
|
||||
} /* if (!buftype_is_contig && filetype_is_contig) */
|
||||
|
||||
/* know file is noncontiguous from above */
|
||||
/* noncontiguous in file */
|
||||
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
|
||||
disp = fd->disp;
|
||||
initial_off = offset;
|
||||
|
||||
|
||||
/* for each case - ADIO_Individual pointer or explicit, find the file
|
||||
offset in bytes (offset), n_filetypes (how many filetypes into
|
||||
file to start), frd_size (remaining amount of data in present
|
||||
file block), and st_index (start point in terms of blocks in
|
||||
starting filetype) */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
flat_file->blocklens[i] >= offset) {
|
||||
st_index = i;
|
||||
frd_size = (int) (disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (!flag) */
|
||||
} /* if (file_ptr_type == ADIO_INDIVIDUAL) */
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
frd_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
} /* else [file_ptr_type != ADIO_INDIVIDUAL] */
|
||||
|
||||
start_off = offset;
|
||||
st_frd_size = frd_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
int mem_lengths;
|
||||
char *mem_offsets;
|
||||
|
||||
i = 0;
|
||||
j = st_index;
|
||||
n_filetypes = st_n_filetypes;
|
||||
|
||||
mem_list_count = 1;
|
||||
|
||||
/* determine how many blocks in file to read */
|
||||
f_data_read = ADIOI_MIN(st_frd_size, bufsize);
|
||||
total_blks_to_read = 1;
|
||||
if (j < (flat_file->count-1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
while (f_data_read < bufsize) {
|
||||
f_data_read += flat_file->blocklens[j];
|
||||
total_blks_to_read++;
|
||||
if (j<(flat_file->count-1)) j++;
|
||||
else j = 0;
|
||||
}
|
||||
|
||||
j = st_index;
|
||||
n_filetypes = st_n_filetypes;
|
||||
n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
|
||||
extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
|
||||
|
||||
mem_offsets = buf;
|
||||
mem_lengths = 0;
|
||||
|
||||
/* if at least one full readlist, allocate file arrays
|
||||
at max array size and don't free until very end */
|
||||
if (n_read_lists) {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
/* if there's no full readlist allocate file arrays according
|
||||
to needed size (extra_blks) */
|
||||
else {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
|
||||
/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
|
||||
for (i=0; i<n_read_lists; i++) {
|
||||
file_list_count = MAX_ARRAY_SIZE;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = st_frd_size;
|
||||
mem_lengths = st_frd_size;
|
||||
}
|
||||
for (k=0; k<MAX_ARRAY_SIZE; k++) {
|
||||
if (i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent
|
||||
+ flat_file->indices[j];
|
||||
file_lengths[k] = flat_file->blocklens[j];
|
||||
mem_lengths += file_lengths[k];
|
||||
}
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE,
|
||||
&file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* PVFS_Request_hindexed already expresses the offsets into the
|
||||
* file, so we should not pass in an offset if we are using
|
||||
* hindexed for the file type */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
|
||||
mem_offsets += mem_lengths;
|
||||
mem_lengths = 0;
|
||||
} /* for (i=0; i<n_read_lists; i++) */
|
||||
|
||||
/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
|
||||
if (extra_blks) {
|
||||
file_list_count = extra_blks;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = st_frd_size;
|
||||
}
|
||||
for (k=0; k<extra_blks; k++) {
|
||||
if(i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent +
|
||||
flat_file->indices[j];
|
||||
if (k == (extra_blks - 1)) {
|
||||
file_lengths[k] = bufsize - (int32_t) mem_lengths
|
||||
- (int32_t) mem_offsets + (int32_t) buf;
|
||||
}
|
||||
else file_lengths[k] = flat_file->blocklens[j];
|
||||
} /* if(i || k) */
|
||||
mem_lengths += file_lengths[k];
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<extra_blks; k++) */
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE, &file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* as above, use 0 for 'offset' when using hindexed file type */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
size_read = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = st_frd_size;
|
||||
brd_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
max_mem_list = 0;
|
||||
max_file_list = 0;
|
||||
|
||||
/* run through and file max_file_list and max_mem_list so that you
|
||||
can allocate the file and memory arrays less than MAX_ARRAY_SIZE
|
||||
if possible */
|
||||
|
||||
while (size_read < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data read and data to be
|
||||
read in the next immediate read list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k] +
|
||||
size_read) > bufsize) {
|
||||
end_brd_size = new_buffer_read +
|
||||
flat_buf->blocklens[k] - (bufsize - size_read);
|
||||
new_buffer_read = bufsize - size_read;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
else new_buffer_read = brd_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
j = start_j;
|
||||
new_file_read = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < new_buffer_read)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_read + flat_file->blocklens[j]) >
|
||||
new_buffer_read) {
|
||||
end_frd_size = new_buffer_read - new_file_read;
|
||||
new_file_read = new_buffer_read;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_read += flat_file->blocklens[j];
|
||||
end_frd_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (frd_size > new_buffer_read) {
|
||||
new_file_read = new_buffer_read;
|
||||
frd_size = new_file_read;
|
||||
}
|
||||
else new_file_read = frd_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_read < new_buffer_read) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_read < new_file_read) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k]) >
|
||||
new_file_read) {
|
||||
end_brd_size = new_file_read - new_buffer_read;
|
||||
new_buffer_read = new_file_read;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_read = brd_size;
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_read < new_file_read) */
|
||||
} /* if ((new_file_read < new_buffer_read) && (file_list_count
|
||||
== MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
|
||||
/* fakes filling the readlist arrays of lengths found above */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
if(i) {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
if (flat_buf->blocklens[k] == end_brd_size)
|
||||
brd_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
brd_size = flat_buf->blocklens[k] - end_brd_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
if (i) {
|
||||
if (i == (file_list_count - 1)) {
|
||||
if (flat_file->blocklens[j] == end_frd_size)
|
||||
frd_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
frd_size = flat_file->blocklens[j] - end_frd_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
size_read += new_buffer_read;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
if (max_mem_list < mem_list_count)
|
||||
max_mem_list = mem_list_count;
|
||||
if (max_file_list < file_list_count)
|
||||
max_file_list = file_list_count;
|
||||
} /* while (size_read < bufsize) */
|
||||
|
||||
/* one last check before we actually carry out the operation:
|
||||
* this code has hard-to-fix bugs when a noncontiguous file type has
|
||||
* such large pieces that the sum of the lengths of the memory type is
|
||||
* not larger than one of those pieces (and vice versa for large memory
|
||||
* types and many pices of file types. In these cases, give up and
|
||||
* fall back to naive reads and writes. The testphdf5 test created a
|
||||
* type with two very large memory regions and 600 very small file
|
||||
* regions. The same test also created a type with one very large file
|
||||
* region and many (700) very small memory regions. both cases caused
|
||||
* problems for this code */
|
||||
|
||||
if ( ( (file_list_count == 1) &&
|
||||
(new_file_read < flat_file->blocklens[0] ) ) ||
|
||||
((mem_list_count == 1) &&
|
||||
(new_buffer_read < flat_buf->blocklens[0]) ) ||
|
||||
((file_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < flat_buf->blocklens[0]) ) ||
|
||||
( (mem_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < flat_file->blocklens[0])) )
|
||||
/* Fall back to list I/O if datatype I/O didn't work */
|
||||
if (ret != 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Falling back to list I/O since datatype I/O failed\n");
|
||||
ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count,
|
||||
datatype, file_ptr_type,
|
||||
offset, status, error_code);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (fd->hints->fs_hints.pvfs2.listio_read == ADIOI_HINT_ENABLE) {
|
||||
ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
/* Use classic list I/O if no hints given base case */
|
||||
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
|
||||
file_ptr_type, initial_off, status, error_code);
|
||||
ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
|
||||
mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
|
||||
file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
|
||||
file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
|
||||
|
||||
size_read = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = st_frd_size;
|
||||
brd_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
|
||||
/* this section calculates mem_list_count and file_list_count
|
||||
and also finds the possibly odd sized last array elements
|
||||
in new_frd_size and new_brd_size */
|
||||
|
||||
while (size_read < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data read and data to be
|
||||
read in the next immediate read list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k] +
|
||||
size_read) > bufsize) {
|
||||
end_brd_size = new_buffer_read +
|
||||
flat_buf->blocklens[k] - (bufsize - size_read);
|
||||
new_buffer_read = bufsize - size_read;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
else new_buffer_read = brd_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
j = start_j;
|
||||
new_file_read = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < new_buffer_read)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_read + flat_file->blocklens[j]) >
|
||||
new_buffer_read) {
|
||||
end_frd_size = new_buffer_read - new_file_read;
|
||||
new_file_read = new_buffer_read;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_read += flat_file->blocklens[j];
|
||||
end_frd_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (frd_size > new_buffer_read) {
|
||||
new_file_read = new_buffer_read;
|
||||
frd_size = new_file_read;
|
||||
}
|
||||
else new_file_read = frd_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_read < new_buffer_read) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_read < new_file_read) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k]) >
|
||||
new_file_read) {
|
||||
end_brd_size = new_file_read - new_buffer_read;
|
||||
new_buffer_read = new_file_read;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_read = brd_size;
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_read < new_file_read) */
|
||||
} /* if ((new_file_read < new_buffer_read) && (file_list_count
|
||||
== MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
|
||||
/* fills the allocated readlist arrays */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
|
||||
(buf_count/flat_buf->count) +
|
||||
(int)flat_buf->indices[k]);
|
||||
if(!i) {
|
||||
mem_lengths[0] = brd_size;
|
||||
mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
|
||||
}
|
||||
else {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
mem_lengths[i] = end_brd_size;
|
||||
if (flat_buf->blocklens[k] == end_brd_size)
|
||||
brd_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
brd_size = flat_buf->blocklens[k] - end_brd_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem_lengths[i] = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
file_offsets[i] = disp + flat_file->indices[j] +
|
||||
((ADIO_Offset)n_filetypes) * filetype_extent;
|
||||
if (!i) {
|
||||
file_lengths[0] = frd_size;
|
||||
file_offsets[0] += flat_file->blocklens[j] - frd_size;
|
||||
}
|
||||
else {
|
||||
if (i == (file_list_count - 1)) {
|
||||
file_lengths[i] = end_frd_size;
|
||||
if (flat_file->blocklens[j] == end_frd_size)
|
||||
frd_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
frd_size = flat_file->blocklens[j] - end_frd_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
else file_lengths[i] = flat_file->blocklens[j];
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
|
||||
mem_offsets, PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0 ) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* -- END ERROR HANDLING-- */
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE, &file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* offset will be expressed in memory and file datatypes */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
size_read += new_buffer_read;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
} /* while (size_read < bufsize) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
}
|
||||
ADIOI_Free(file_offsets);
|
||||
ADIOI_Free(file_lengths);
|
||||
|
||||
/* Other ADIO routines will convert absolute bytes into counts of datatypes */
|
||||
/* when incrementing fp_ind, need to also take into account the file type:
|
||||
* consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
|
||||
* if we wrote N elements, offset needs to point at beginning of type, not
|
||||
* at empty region at offset N+1) */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* this is closer, but still incorrect for the cases where a small
|
||||
* amount of a file type is "leftover" after a write */
|
||||
fd->fp_ind = disp + flat_file->indices[j] +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent;
|
||||
}
|
||||
if (err_flag == 0) *error_code = MPI_SUCCESS;
|
||||
|
||||
error_state:
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually read and placed in buf
|
||||
by ADIOI_BUFFERED_READ. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
}
|
||||
|
||||
/*
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
|
909
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c
Обычный файл
909
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c
Обычный файл
@ -0,0 +1,909 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*-
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*
|
||||
* Copyright (C) 2008 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "ad_pvfs2.h"
|
||||
|
||||
#include "ad_pvfs2_common.h"
|
||||
|
||||
void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, brd_size, frd_size=0, st_index=0;
|
||||
int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset off, disp, start_off, initial_off;
|
||||
int flag, st_frd_size, st_n_filetypes;
|
||||
|
||||
int mem_list_count, file_list_count;
|
||||
PVFS_size *mem_offsets;
|
||||
int64_t *file_offsets;
|
||||
int *mem_lengths;
|
||||
int32_t *file_lengths;
|
||||
int total_blks_to_read;
|
||||
|
||||
int max_mem_list, max_file_list;
|
||||
|
||||
int b_blks_read;
|
||||
int f_data_read;
|
||||
int size_read=0, n_read_lists, extra_blks;
|
||||
|
||||
int end_brd_size, end_frd_size;
|
||||
int start_k, start_j, new_file_read, new_buffer_read;
|
||||
int start_mem_offset;
|
||||
PVFS_Request mem_req, file_req;
|
||||
ADIOI_PVFS2_fs * pvfs_fs;
|
||||
PVFS_sysresp_io resp_io;
|
||||
int err_flag=0;
|
||||
MPI_Offset total_bytes_read = 0;
|
||||
static char myname[] = "ADIOI_PVFS2_ReadStrided";
|
||||
|
||||
#define MAX_ARRAY_SIZE 64
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
/* the HDF5 tests showed a bug in this list processing code (see many many
|
||||
* lines down below). We added a workaround, but common HDF5 file types
|
||||
* are actually contiguous and do not need the expensive workarond */
|
||||
if (!filetype_is_contig) {
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
if (flat_file->count == 1 && !buftype_is_contig)
|
||||
filetype_is_contig = 1;
|
||||
}
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if ( ! filetype_size ) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
|
||||
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
int64_t file_offsets;
|
||||
int32_t file_lengths;
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
|
||||
file_list_count = 1;
|
||||
file_offsets = off;
|
||||
file_lengths = 0;
|
||||
total_blks_to_read = count*flat_buf->count;
|
||||
b_blks_read = 0;
|
||||
|
||||
/* allocate arrays according to max usage */
|
||||
if (total_blks_to_read > MAX_ARRAY_SIZE)
|
||||
mem_list_count = MAX_ARRAY_SIZE;
|
||||
else mem_list_count = total_blks_to_read;
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
|
||||
mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
|
||||
|
||||
/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
|
||||
|
||||
j = 0;
|
||||
/* step through each block in memory, filling memory arrays */
|
||||
while (b_blks_read < total_blks_to_read) {
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
|
||||
/* TODO: fix this compiler warning */
|
||||
((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
|
||||
mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
|
||||
flat_buf->blocklens[i];
|
||||
file_lengths += flat_buf->blocklens[i];
|
||||
b_blks_read++;
|
||||
if (!(b_blks_read % MAX_ARRAY_SIZE) ||
|
||||
(b_blks_read == total_blks_to_read)) {
|
||||
|
||||
/* in the case of the last read list call,
|
||||
adjust mem_list_count */
|
||||
if (b_blks_read == total_blks_to_read) {
|
||||
mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
|
||||
/* in case last read list call fills max arrays */
|
||||
if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
|
||||
}
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count,
|
||||
mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
|
||||
if (err_flag < 0) break;
|
||||
err_flag = PVFS_Request_contiguous(file_lengths,
|
||||
PVFS_BYTE, &file_req);
|
||||
if (err_flag < 0) break;
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
|
||||
file_offsets, PVFS_BOTTOM, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* in the case of error or the last read list call,
|
||||
* leave here */
|
||||
if (err_flag || b_blks_read == total_blks_to_read) break;
|
||||
|
||||
file_offsets += file_lengths;
|
||||
file_lengths = 0;
|
||||
}
|
||||
} /* for (i=0; i<flat_buf->count; i++) */
|
||||
j++;
|
||||
} /* while (b_blks_read < total_blks_to_read) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind += total_bytes_read;
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This isa temporary way of filling in status. The right way is to
|
||||
keep tracke of how much data was actually read adn placed in buf
|
||||
by ADIOI_BUFFERED_READ. */
|
||||
#endif
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
|
||||
return;
|
||||
} /* if (!buftype_is_contig && filetype_is_contig) */
|
||||
|
||||
/* know file is noncontiguous from above */
|
||||
/* noncontiguous in file */
|
||||
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
|
||||
disp = fd->disp;
|
||||
initial_off = offset;
|
||||
|
||||
|
||||
/* for each case - ADIO_Individual pointer or explicit, find the file
|
||||
offset in bytes (offset), n_filetypes (how many filetypes into
|
||||
file to start), frd_size (remaining amount of data in present
|
||||
file block), and st_index (start point in terms of blocks in
|
||||
starting filetype) */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
flat_file->blocklens[i] >= offset) {
|
||||
st_index = i;
|
||||
frd_size = (int) (disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset);
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (!flag) */
|
||||
} /* if (file_ptr_type == ADIO_INDIVIDUAL) */
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
frd_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
} /* else [file_ptr_type != ADIO_INDIVIDUAL] */
|
||||
|
||||
start_off = offset;
|
||||
st_frd_size = frd_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
int mem_lengths;
|
||||
char *mem_offsets;
|
||||
|
||||
i = 0;
|
||||
j = st_index;
|
||||
n_filetypes = st_n_filetypes;
|
||||
|
||||
mem_list_count = 1;
|
||||
|
||||
/* determine how many blocks in file to read */
|
||||
f_data_read = ADIOI_MIN(st_frd_size, bufsize);
|
||||
total_blks_to_read = 1;
|
||||
if (j < (flat_file->count-1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
while (f_data_read < bufsize) {
|
||||
f_data_read += flat_file->blocklens[j];
|
||||
total_blks_to_read++;
|
||||
if (j<(flat_file->count-1)) j++;
|
||||
else j = 0;
|
||||
}
|
||||
|
||||
j = st_index;
|
||||
n_filetypes = st_n_filetypes;
|
||||
n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
|
||||
extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
|
||||
|
||||
mem_offsets = buf;
|
||||
mem_lengths = 0;
|
||||
|
||||
/* if at least one full readlist, allocate file arrays
|
||||
at max array size and don't free until very end */
|
||||
if (n_read_lists) {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
/* if there's no full readlist allocate file arrays according
|
||||
to needed size (extra_blks) */
|
||||
else {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
|
||||
/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
|
||||
for (i=0; i<n_read_lists; i++) {
|
||||
file_list_count = MAX_ARRAY_SIZE;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = st_frd_size;
|
||||
mem_lengths = st_frd_size;
|
||||
}
|
||||
for (k=0; k<MAX_ARRAY_SIZE; k++) {
|
||||
if (i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent
|
||||
+ flat_file->indices[j];
|
||||
file_lengths[k] = flat_file->blocklens[j];
|
||||
mem_lengths += file_lengths[k];
|
||||
}
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE,
|
||||
&file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* PVFS_Request_hindexed already expresses the offsets into the
|
||||
* file, so we should not pass in an offset if we are using
|
||||
* hindexed for the file type */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
|
||||
mem_offsets += mem_lengths;
|
||||
mem_lengths = 0;
|
||||
} /* for (i=0; i<n_read_lists; i++) */
|
||||
|
||||
/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
|
||||
if (extra_blks) {
|
||||
file_list_count = extra_blks;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = ADIOI_MIN(st_frd_size, bufsize);
|
||||
}
|
||||
for (k=0; k<extra_blks; k++) {
|
||||
if(i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent +
|
||||
flat_file->indices[j];
|
||||
if (k == (extra_blks - 1)) {
|
||||
file_lengths[k] = bufsize - (int32_t) mem_lengths
|
||||
- (int32_t) mem_offsets + (int32_t) buf;
|
||||
}
|
||||
else file_lengths[k] = flat_file->blocklens[j];
|
||||
} /* if(i || k) */
|
||||
mem_lengths += file_lengths[k];
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<extra_blks; k++) */
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE, &file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* as above, use 0 for 'offset' when using hindexed file type */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
size_read = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = st_frd_size;
|
||||
brd_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
max_mem_list = 0;
|
||||
max_file_list = 0;
|
||||
|
||||
/* run through and file max_file_list and max_mem_list so that you
|
||||
can allocate the file and memory arrays less than MAX_ARRAY_SIZE
|
||||
if possible */
|
||||
|
||||
while (size_read < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data read and data to be
|
||||
read in the next immediate read list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k] +
|
||||
size_read) > bufsize) {
|
||||
end_brd_size = new_buffer_read +
|
||||
flat_buf->blocklens[k] - (bufsize - size_read);
|
||||
new_buffer_read = bufsize - size_read;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
else new_buffer_read = brd_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
j = start_j;
|
||||
new_file_read = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < new_buffer_read)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_read + flat_file->blocklens[j]) >
|
||||
new_buffer_read) {
|
||||
end_frd_size = new_buffer_read - new_file_read;
|
||||
new_file_read = new_buffer_read;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_read += flat_file->blocklens[j];
|
||||
end_frd_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (frd_size > new_buffer_read) {
|
||||
new_file_read = new_buffer_read;
|
||||
frd_size = new_file_read;
|
||||
}
|
||||
else new_file_read = frd_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_read < new_buffer_read) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_read < new_file_read) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k]) >
|
||||
new_file_read) {
|
||||
end_brd_size = new_file_read - new_buffer_read;
|
||||
new_buffer_read = new_file_read;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_read = brd_size;
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_read < new_file_read) */
|
||||
} /* if ((new_file_read < new_buffer_read) && (file_list_count
|
||||
== MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
|
||||
/* fakes filling the readlist arrays of lengths found above */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
if(i) {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
if (flat_buf->blocklens[k] == end_brd_size)
|
||||
brd_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
brd_size = flat_buf->blocklens[k] - end_brd_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
if (i) {
|
||||
if (i == (file_list_count - 1)) {
|
||||
if (flat_file->blocklens[j] == end_frd_size)
|
||||
frd_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
frd_size = flat_file->blocklens[j] - end_frd_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
size_read += new_buffer_read;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
if (max_mem_list < mem_list_count)
|
||||
max_mem_list = mem_list_count;
|
||||
if (max_file_list < file_list_count)
|
||||
max_file_list = file_list_count;
|
||||
} /* while (size_read < bufsize) */
|
||||
|
||||
/* one last check before we actually carry out the operation:
|
||||
* this code has hard-to-fix bugs when a noncontiguous file type has
|
||||
* such large pieces that the sum of the lengths of the memory type is
|
||||
* not larger than one of those pieces (and vice versa for large memory
|
||||
* types and many pices of file types. In these cases, give up and
|
||||
* fall back to naive reads and writes. The testphdf5 test created a
|
||||
* type with two very large memory regions and 600 very small file
|
||||
* regions. The same test also created a type with one very large file
|
||||
* region and many (700) very small memory regions. both cases caused
|
||||
* problems for this code */
|
||||
|
||||
if ( ( (file_list_count == 1) &&
|
||||
(new_file_read < flat_file->blocklens[0] ) ) ||
|
||||
((mem_list_count == 1) &&
|
||||
(new_buffer_read < flat_buf->blocklens[0]) ) ||
|
||||
((file_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < flat_buf->blocklens[0]) ) ||
|
||||
( (mem_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < flat_file->blocklens[0])) )
|
||||
{
|
||||
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
|
||||
file_ptr_type, initial_off, status, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
|
||||
mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
|
||||
file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
|
||||
file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
|
||||
|
||||
size_read = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = st_frd_size;
|
||||
brd_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
|
||||
/* this section calculates mem_list_count and file_list_count
|
||||
and also finds the possibly odd sized last array elements
|
||||
in new_frd_size and new_brd_size */
|
||||
|
||||
while (size_read < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data read and data to be
|
||||
read in the next immediate read list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k] +
|
||||
size_read) > bufsize) {
|
||||
end_brd_size = new_buffer_read +
|
||||
flat_buf->blocklens[k] - (bufsize - size_read);
|
||||
new_buffer_read = bufsize - size_read;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
else new_buffer_read = brd_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
j = start_j;
|
||||
new_file_read = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_read < new_buffer_read)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_read + flat_file->blocklens[j]) >
|
||||
new_buffer_read) {
|
||||
end_frd_size = new_buffer_read - new_file_read;
|
||||
new_file_read = new_buffer_read;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_read += flat_file->blocklens[j];
|
||||
end_frd_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (frd_size > new_buffer_read) {
|
||||
new_file_read = new_buffer_read;
|
||||
frd_size = new_file_read;
|
||||
}
|
||||
else new_file_read = frd_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_read < new_buffer_read) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_read = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_read < new_file_read) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_read + flat_buf->blocklens[k]) >
|
||||
new_file_read) {
|
||||
end_brd_size = new_file_read - new_buffer_read;
|
||||
new_buffer_read = new_file_read;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_read += flat_buf->blocklens[k];
|
||||
end_brd_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_read = brd_size;
|
||||
if (brd_size > (bufsize - size_read)) {
|
||||
new_buffer_read = bufsize - size_read;
|
||||
brd_size = new_buffer_read;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_read < new_file_read) */
|
||||
} /* if ((new_file_read < new_buffer_read) && (file_list_count
|
||||
== MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_read < bufsize-size_read)) */
|
||||
|
||||
/* fills the allocated readlist arrays */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
|
||||
(buf_count/flat_buf->count) +
|
||||
(int)flat_buf->indices[k]);
|
||||
if(!i) {
|
||||
mem_lengths[0] = brd_size;
|
||||
mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
|
||||
}
|
||||
else {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
mem_lengths[i] = end_brd_size;
|
||||
if (flat_buf->blocklens[k] == end_brd_size)
|
||||
brd_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
brd_size = flat_buf->blocklens[k] - end_brd_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem_lengths[i] = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
file_offsets[i] = disp + flat_file->indices[j] +
|
||||
((ADIO_Offset)n_filetypes) * filetype_extent;
|
||||
if (!i) {
|
||||
file_lengths[0] = frd_size;
|
||||
file_offsets[0] += flat_file->blocklens[j] - frd_size;
|
||||
}
|
||||
else {
|
||||
if (i == (file_list_count - 1)) {
|
||||
file_lengths[i] = end_frd_size;
|
||||
if (flat_file->blocklens[j] == end_frd_size)
|
||||
frd_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
frd_size = flat_file->blocklens[j] - end_frd_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
else file_lengths[i] = flat_file->blocklens[j];
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
|
||||
mem_offsets, PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0 ) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* -- END ERROR HANDLING-- */
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE, &file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* offset will be expressed in memory and file datatypes */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
|
||||
PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_read", 0);
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
total_bytes_read += resp_io.total_completed;
|
||||
size_read += new_buffer_read;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
} /* while (size_read < bufsize) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
}
|
||||
/* Other ADIO routines will convert absolute bytes into counts of datatypes */
|
||||
/* when incrementing fp_ind, need to also take into account the file type:
|
||||
* consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
|
||||
* if we wrote N elements, offset needs to point at beginning of type, not
|
||||
* at empty region at offset N+1)
|
||||
*
|
||||
* As we discussed on mpich-discuss in may/june 2009, the code below might
|
||||
* look wierd, but by putting fp_ind at the last byte written, the next
|
||||
* time we run through the strided code we'll update the fp_ind to the
|
||||
* right location. */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
fd->fp_ind = file_offsets[file_list_count-1]+
|
||||
file_lengths[file_list_count-1];
|
||||
}
|
||||
|
||||
ADIOI_Free(file_offsets);
|
||||
ADIOI_Free(file_lengths);
|
||||
|
||||
if (err_flag == 0) *error_code = MPI_SUCCESS;
|
||||
|
||||
error_state:
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually read and placed in buf
|
||||
by ADIOI_BUFFERED_READ. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
}
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
963
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c
Обычный файл
963
ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c
Обычный файл
@ -0,0 +1,963 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*-
|
||||
* vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*
|
||||
* Copyright (C) 2008 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "ad_pvfs2.h"
|
||||
|
||||
#include "ad_pvfs2_common.h"
|
||||
|
||||
void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
/* as with all the other WriteStrided functions, offset is in units of
|
||||
* etype relative to the filetype */
|
||||
|
||||
/* Since PVFS2 does not support file locking, can't do buffered writes
|
||||
as on Unix */
|
||||
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, bwr_size, fwr_size=0, st_index=0;
|
||||
int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset off, disp, start_off, initial_off;
|
||||
int flag, st_fwr_size, st_n_filetypes;
|
||||
int err_flag=0;
|
||||
|
||||
int mem_list_count, file_list_count;
|
||||
PVFS_size * mem_offsets;
|
||||
int64_t *file_offsets;
|
||||
int *mem_lengths;
|
||||
int32_t *file_lengths;
|
||||
int total_blks_to_write;
|
||||
|
||||
int max_mem_list, max_file_list;
|
||||
|
||||
int b_blks_wrote;
|
||||
int f_data_wrote;
|
||||
int size_wrote=0, n_write_lists, extra_blks;
|
||||
|
||||
int end_bwr_size, end_fwr_size;
|
||||
int start_k, start_j, new_file_write, new_buffer_write;
|
||||
int start_mem_offset;
|
||||
PVFS_Request mem_req, file_req;
|
||||
ADIOI_PVFS2_fs * pvfs_fs;
|
||||
PVFS_sysresp_io resp_io;
|
||||
MPI_Offset total_bytes_written=0;
|
||||
static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
|
||||
|
||||
/* note: don't increase this: several parts of PVFS2 now
|
||||
* assume this limit*/
|
||||
#define MAX_ARRAY_SIZE 64
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (fd->atomicity) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_ARG,
|
||||
"Atomic noncontiguous writes are not supported by PVFS2", 0);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
/* the HDF5 tests showed a bug in this list processing code (see many many
|
||||
* lines down below). We added a workaround, but common HDF5 file types
|
||||
* are actually contiguous and do not need the expensive workarond */
|
||||
if (!filetype_is_contig) {
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
if (flat_file->count == 1 && !buftype_is_contig)
|
||||
filetype_is_contig = 1;
|
||||
}
|
||||
|
||||
MPI_Type_size(fd->filetype, &filetype_size);
|
||||
if ( ! filetype_size ) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
|
||||
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
int64_t file_offsets;
|
||||
int32_t file_lengths;
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
off = fd->disp + etype_size * offset;
|
||||
}
|
||||
else off = fd->fp_ind;
|
||||
|
||||
file_list_count = 1;
|
||||
file_offsets = off;
|
||||
file_lengths = 0;
|
||||
total_blks_to_write = count*flat_buf->count;
|
||||
b_blks_wrote = 0;
|
||||
|
||||
/* allocate arrays according to max usage */
|
||||
if (total_blks_to_write > MAX_ARRAY_SIZE)
|
||||
mem_list_count = MAX_ARRAY_SIZE;
|
||||
else mem_list_count = total_blks_to_write;
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
|
||||
mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
|
||||
|
||||
j = 0;
|
||||
/* step through each block in memory, filling memory arrays */
|
||||
while (b_blks_wrote < total_blks_to_write) {
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
|
||||
/* TODO: fix this warning by casting to an integer that's
|
||||
* the same size as a char * and /then/ casting to
|
||||
* PVFS_size */
|
||||
((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
|
||||
mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] =
|
||||
flat_buf->blocklens[i];
|
||||
file_lengths += flat_buf->blocklens[i];
|
||||
b_blks_wrote++;
|
||||
if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
|
||||
(b_blks_wrote == total_blks_to_write)) {
|
||||
|
||||
/* in the case of the last write list call,
|
||||
adjust mem_list_count */
|
||||
if (b_blks_wrote == total_blks_to_write) {
|
||||
mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
|
||||
/* in case last write list call fills max arrays */
|
||||
if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
|
||||
}
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count,
|
||||
mem_lengths, mem_offsets,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (memory)", 0);
|
||||
break;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_contiguous(file_lengths,
|
||||
PVFS_BYTE, &file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (file)", 0);
|
||||
break;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
|
||||
file_offsets, PVFS_BOTTOM,
|
||||
mem_req,
|
||||
&(pvfs_fs->credentials),
|
||||
&resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
total_bytes_written += resp_io.total_completed;
|
||||
|
||||
/* in the case of error or the last write list call,
|
||||
* leave here */
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_write", 0);
|
||||
break;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
if (b_blks_wrote == total_blks_to_write) break;
|
||||
|
||||
file_offsets += file_lengths;
|
||||
file_lengths = 0;
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
}
|
||||
} /* for (i=0; i<flat_buf->count; i++) */
|
||||
j++;
|
||||
} /* while (b_blks_wrote < total_blks_to_write) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind += total_bytes_written;
|
||||
|
||||
if (!err_flag) *error_code = MPI_SUCCESS;
|
||||
|
||||
fd->fp_sys_posn = -1; /* clear this. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
return;
|
||||
} /* if (!buftype_is_contig && filetype_is_contig) */
|
||||
|
||||
/* already know that file is noncontiguous from above */
|
||||
/* noncontiguous in file */
|
||||
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
|
||||
disp = fd->disp;
|
||||
initial_off = offset;
|
||||
|
||||
/* for each case - ADIO_Individual pointer or explicit, find offset
|
||||
(file offset in bytes), n_filetypes (how many filetypes into file
|
||||
to start), fwr_size (remaining amount of data in present file
|
||||
block), and st_index (start point in terms of blocks in starting
|
||||
filetype) */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind; /* in bytes */
|
||||
n_filetypes = -1;
|
||||
flag = 0;
|
||||
while (!flag) {
|
||||
n_filetypes++;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
if (disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
flat_file->blocklens[i] >= offset) {
|
||||
st_index = i;
|
||||
fwr_size = disp + flat_file->indices[i] +
|
||||
((ADIO_Offset) n_filetypes)*filetype_extent
|
||||
+ flat_file->blocklens[i] - offset;
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (!flag) */
|
||||
} /* if (file_ptr_type == ADIO_INDIVIDUAL) */
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
fwr_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
} /* else [file_ptr_type != ADIO_INDIVIDUAL] */
|
||||
|
||||
start_off = offset;
|
||||
st_fwr_size = fwr_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
int mem_lengths;
|
||||
char *mem_offsets;
|
||||
|
||||
i = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
|
||||
mem_list_count = 1;
|
||||
|
||||
/* determine how many blocks in file to write */
|
||||
f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
total_blks_to_write = 1;
|
||||
if (j < (flat_file->count -1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
while (f_data_wrote < bufsize) {
|
||||
f_data_wrote += flat_file->blocklens[j];
|
||||
total_blks_to_write++;
|
||||
if (j<(flat_file->count-1)) j++;
|
||||
else j = 0;
|
||||
}
|
||||
|
||||
j = st_index;
|
||||
n_filetypes = st_n_filetypes;
|
||||
n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
|
||||
extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
|
||||
|
||||
mem_offsets = buf;
|
||||
mem_lengths = 0;
|
||||
|
||||
/* if at least one full writelist, allocate file arrays
|
||||
at max array size and don't free until very end */
|
||||
if (n_write_lists) {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
/* if there's no full writelist allocate file arrays according
|
||||
to needed size (extra_blks) */
|
||||
else {
|
||||
file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int64_t));
|
||||
file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
|
||||
sizeof(int32_t));
|
||||
}
|
||||
|
||||
/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
|
||||
for (i=0; i<n_write_lists; i++) {
|
||||
file_list_count = MAX_ARRAY_SIZE;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = st_fwr_size;
|
||||
mem_lengths = st_fwr_size;
|
||||
}
|
||||
for (k=0; k<MAX_ARRAY_SIZE; k++) {
|
||||
if (i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent
|
||||
+ flat_file->indices[j];
|
||||
file_lengths[k] = flat_file->blocklens[j];
|
||||
mem_lengths += file_lengths[k];
|
||||
}
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
|
||||
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE,
|
||||
&file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* PVFS_Request_hindexed already expresses the offsets into the
|
||||
* file, so we should not pass in an offset if we are using
|
||||
* hindexed for the file type */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_write", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
total_bytes_written += resp_io.total_completed;
|
||||
|
||||
mem_offsets += mem_lengths;
|
||||
mem_lengths = 0;
|
||||
PVFS_Request_free(&file_req);
|
||||
PVFS_Request_free(&mem_req);
|
||||
|
||||
} /* for (i=0; i<n_write_lists; i++) */
|
||||
|
||||
/* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */
|
||||
if (extra_blks) {
|
||||
file_list_count = extra_blks;
|
||||
if(!i) {
|
||||
file_offsets[0] = offset;
|
||||
file_lengths[0] = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
}
|
||||
for (k=0; k<extra_blks; k++) {
|
||||
if(i || k) {
|
||||
file_offsets[k] = disp +
|
||||
((ADIO_Offset)n_filetypes)*filetype_extent +
|
||||
flat_file->indices[j];
|
||||
if (k == (extra_blks - 1)) {
|
||||
file_lengths[k] = bufsize - (int32_t) mem_lengths
|
||||
- (int32_t) mem_offsets + (int32_t) buf;
|
||||
}
|
||||
else file_lengths[k] = flat_file->blocklens[j];
|
||||
} /* if(i || k) */
|
||||
mem_lengths += file_lengths[k];
|
||||
if (j<(flat_file->count - 1)) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (k=0; k<extra_blks; k++) */
|
||||
|
||||
err_flag = PVFS_Request_contiguous(mem_lengths,
|
||||
PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_contiguous (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE,
|
||||
&file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed(file)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* as above, use 0 for 'offset' when using hindexed file type*/
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
|
||||
mem_offsets, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_write", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
total_bytes_written += resp_io.total_completed;
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
size_wrote = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = st_fwr_size;
|
||||
bwr_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
max_mem_list = 0;
|
||||
max_file_list = 0;
|
||||
|
||||
/* run through and file max_file_list and max_mem_list so that you
|
||||
can allocate the file and memory arrays less than MAX_ARRAY_SIZE
|
||||
if possible */
|
||||
|
||||
while (size_wrote < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_write = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data written and data to be
|
||||
written in the next immediate write list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_write + flat_buf->blocklens[k] +
|
||||
size_wrote) > bufsize) {
|
||||
end_bwr_size = new_buffer_write +
|
||||
flat_buf->blocklens[k] - (bufsize - size_wrote);
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
}
|
||||
else {
|
||||
new_buffer_write += flat_buf->blocklens[k];
|
||||
end_bwr_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (bwr_size > (bufsize - size_wrote)) {
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
bwr_size = new_buffer_write;
|
||||
}
|
||||
else new_buffer_write = bwr_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) */
|
||||
j = start_j;
|
||||
new_file_write = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_write < new_buffer_write)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_write + flat_file->blocklens[j]) >
|
||||
new_buffer_write) {
|
||||
end_fwr_size = new_buffer_write - new_file_write;
|
||||
new_file_write = new_buffer_write;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_write += flat_file->blocklens[j];
|
||||
end_fwr_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (fwr_size > new_buffer_write) {
|
||||
new_file_write = new_buffer_write;
|
||||
fwr_size = new_file_write;
|
||||
}
|
||||
else new_file_write = fwr_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_write < new_buffer_write) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_write = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_write < new_file_write) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_write + flat_buf->blocklens[k]) >
|
||||
new_file_write) {
|
||||
end_bwr_size = new_file_write -
|
||||
new_buffer_write;
|
||||
new_buffer_write = new_file_write;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_write += flat_buf->blocklens[k];
|
||||
end_bwr_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_write = bwr_size;
|
||||
if (bwr_size > (bufsize - size_wrote)) {
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
bwr_size = new_buffer_write;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_write < new_file_write) */
|
||||
} /* if ((new_file_write < new_buffer_write) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) */
|
||||
|
||||
/* fakes filling the writelist arrays of lengths found above */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
if(i) {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
if (flat_buf->blocklens[k] == end_bwr_size)
|
||||
bwr_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
bwr_size = flat_buf->blocklens[k] - end_bwr_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
if (i) {
|
||||
if (i == (file_list_count - 1)) {
|
||||
if (flat_file->blocklens[j] == end_fwr_size)
|
||||
fwr_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
fwr_size = flat_file->blocklens[j] - end_fwr_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
size_wrote += new_buffer_write;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
if (max_mem_list < mem_list_count)
|
||||
max_mem_list = mem_list_count;
|
||||
if (max_file_list < file_list_count)
|
||||
max_file_list = file_list_count;
|
||||
} /* while (size_wrote < bufsize) */
|
||||
|
||||
/* one last check before we actually carry out the operation:
|
||||
* this code has hard-to-fix bugs when a noncontiguous file type has
|
||||
* such large pieces that the sum of the lengths of the memory type is
|
||||
* not larger than one of those pieces (and vice versa for large memory
|
||||
* types and many pices of file types. In these cases, give up and
|
||||
* fall back to naive reads and writes. The testphdf5 test created a
|
||||
* type with two very large memory regions and 600 very small file
|
||||
* regions. The same test also created a type with one very large file
|
||||
* region and many (700) very small memory regions. both cases caused
|
||||
* problems for this code */
|
||||
|
||||
if ( ( (file_list_count == 1) &&
|
||||
(new_file_write < flat_file->blocklens[0] ) ) ||
|
||||
((mem_list_count == 1) &&
|
||||
(new_buffer_write < flat_buf->blocklens[0]) ) ||
|
||||
((file_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_file_write < flat_buf->blocklens[0]) ) ||
|
||||
( (mem_list_count == MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < flat_file->blocklens[0])) )
|
||||
{
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
|
||||
file_ptr_type, initial_off, status, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
|
||||
mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
|
||||
file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
|
||||
file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
|
||||
|
||||
size_wrote = 0;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = st_fwr_size;
|
||||
bwr_size = flat_buf->blocklens[0];
|
||||
buf_count = 0;
|
||||
start_mem_offset = 0;
|
||||
start_k = k = 0;
|
||||
start_j = st_index;
|
||||
|
||||
/* this section calculates mem_list_count and file_list_count
|
||||
and also finds the possibly odd sized last array elements
|
||||
in new_fwr_size and new_bwr_size */
|
||||
|
||||
while (size_wrote < bufsize) {
|
||||
k = start_k;
|
||||
new_buffer_write = 0;
|
||||
mem_list_count = 0;
|
||||
while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) {
|
||||
/* find mem_list_count and file_list_count such that both are
|
||||
less than MAX_ARRAY_SIZE, the sum of their lengths are
|
||||
equal, and the sum of all the data written and data to be
|
||||
written in the next immediate write list is less than
|
||||
bufsize */
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_write + flat_buf->blocklens[k] +
|
||||
size_wrote) > bufsize) {
|
||||
end_bwr_size = new_buffer_write +
|
||||
flat_buf->blocklens[k] - (bufsize - size_wrote);
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
}
|
||||
else {
|
||||
new_buffer_write += flat_buf->blocklens[k];
|
||||
end_bwr_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (bwr_size > (bufsize - size_wrote)) {
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
bwr_size = new_buffer_write;
|
||||
}
|
||||
else new_buffer_write = bwr_size;
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) */
|
||||
j = start_j;
|
||||
new_file_write = 0;
|
||||
file_list_count = 0;
|
||||
while ((file_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_file_write < new_buffer_write)) {
|
||||
if(file_list_count) {
|
||||
if((new_file_write + flat_file->blocklens[j]) >
|
||||
new_buffer_write) {
|
||||
end_fwr_size = new_buffer_write - new_file_write;
|
||||
new_file_write = new_buffer_write;
|
||||
j--;
|
||||
}
|
||||
else {
|
||||
new_file_write += flat_file->blocklens[j];
|
||||
end_fwr_size = flat_file->blocklens[j];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (fwr_size > new_buffer_write) {
|
||||
new_file_write = new_buffer_write;
|
||||
fwr_size = new_file_write;
|
||||
}
|
||||
else new_file_write = fwr_size;
|
||||
}
|
||||
file_list_count++;
|
||||
if (j < (flat_file->count - 1)) j++;
|
||||
else j = 0;
|
||||
|
||||
k = start_k;
|
||||
if ((new_file_write < new_buffer_write) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) {
|
||||
new_buffer_write = 0;
|
||||
mem_list_count = 0;
|
||||
while (new_buffer_write < new_file_write) {
|
||||
if(mem_list_count) {
|
||||
if((new_buffer_write + flat_buf->blocklens[k]) >
|
||||
new_file_write) {
|
||||
end_bwr_size = new_file_write -
|
||||
new_buffer_write;
|
||||
new_buffer_write = new_file_write;
|
||||
k--;
|
||||
}
|
||||
else {
|
||||
new_buffer_write += flat_buf->blocklens[k];
|
||||
end_bwr_size = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
else {
|
||||
new_buffer_write = bwr_size;
|
||||
if (bwr_size > (bufsize - size_wrote)) {
|
||||
new_buffer_write = bufsize - size_wrote;
|
||||
bwr_size = new_buffer_write;
|
||||
}
|
||||
}
|
||||
mem_list_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* while (new_buffer_write < new_file_write) */
|
||||
} /* if ((new_file_write < new_buffer_write) &&
|
||||
(file_list_count == MAX_ARRAY_SIZE)) */
|
||||
} /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
|
||||
(new_buffer_write < bufsize-size_wrote)) */
|
||||
|
||||
/* fills the allocated writelist arrays */
|
||||
k = start_k;
|
||||
j = start_j;
|
||||
for (i=0; i<mem_list_count; i++) {
|
||||
/* TODO: fix this warning by casting to an integer that's the
|
||||
* same size as a char * and /then/ casting to PVFS_size */
|
||||
mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
|
||||
(buf_count/flat_buf->count) +
|
||||
(int)flat_buf->indices[k]);
|
||||
|
||||
if(!i) {
|
||||
mem_lengths[0] = bwr_size;
|
||||
mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
|
||||
}
|
||||
else {
|
||||
if (i == (mem_list_count - 1)) {
|
||||
mem_lengths[i] = end_bwr_size;
|
||||
if (flat_buf->blocklens[k] == end_bwr_size)
|
||||
bwr_size = flat_buf->blocklens[(k+1)%
|
||||
flat_buf->count];
|
||||
else {
|
||||
bwr_size = flat_buf->blocklens[k] - end_bwr_size;
|
||||
k--;
|
||||
buf_count--;
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem_lengths[i] = flat_buf->blocklens[k];
|
||||
}
|
||||
}
|
||||
buf_count++;
|
||||
k = (k + 1)%flat_buf->count;
|
||||
} /* for (i=0; i<mem_list_count; i++) */
|
||||
for (i=0; i<file_list_count; i++) {
|
||||
file_offsets[i] = disp + flat_file->indices[j] +
|
||||
((ADIO_Offset)n_filetypes) * filetype_extent;
|
||||
if (!i) {
|
||||
file_lengths[0] = fwr_size;
|
||||
file_offsets[0] += flat_file->blocklens[j] - fwr_size;
|
||||
}
|
||||
else {
|
||||
if (i == (file_list_count - 1)) {
|
||||
file_lengths[i] = end_fwr_size;
|
||||
if (flat_file->blocklens[j] == end_fwr_size)
|
||||
fwr_size = flat_file->blocklens[(j+1)%
|
||||
flat_file->count];
|
||||
else {
|
||||
fwr_size = flat_file->blocklens[j] - end_fwr_size;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
else file_lengths[i] = flat_file->blocklens[j];
|
||||
}
|
||||
if (j < flat_file->count - 1) j++;
|
||||
else {
|
||||
j = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} /* for (i=0; i<file_list_count; i++) */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
|
||||
mem_offsets, PVFS_BYTE, &mem_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0 ) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed (memory)", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
|
||||
file_offsets, PVFS_BYTE,
|
||||
&file_req);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_Request_hindexed", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* offset will be expressed in memory and file datatypes */
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
|
||||
PVFS_BOTTOM, mem_req,
|
||||
&(pvfs_fs->credentials), &resp_io);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err_flag != 0) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
ADIOI_PVFS2_error_convert(err_flag),
|
||||
"Error in PVFS_sys_write", 0);
|
||||
goto error_state;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
size_wrote += new_buffer_write;
|
||||
total_bytes_written += resp_io.total_completed;
|
||||
start_k = k;
|
||||
start_j = j;
|
||||
PVFS_Request_free(&mem_req);
|
||||
PVFS_Request_free(&file_req);
|
||||
} /* while (size_wrote < bufsize) */
|
||||
ADIOI_Free(mem_offsets);
|
||||
ADIOI_Free(mem_lengths);
|
||||
}
|
||||
/* when incrementing fp_ind, need to also take into account the file type:
|
||||
* consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
|
||||
* if we wrote N elements, offset needs to point at beginning of type, not
|
||||
* at empty region at offset N+1).
|
||||
*
|
||||
* As we discussed on mpich-discuss in may/june 2009, the code below might
|
||||
* look wierd, but by putting fp_ind at the last byte written, the next
|
||||
* time we run through the strided code we'll update the fp_ind to the
|
||||
* right location. */
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
fd->fp_ind = file_offsets[file_list_count-1]+
|
||||
file_lengths[file_list_count-1];
|
||||
}
|
||||
ADIOI_Free(file_offsets);
|
||||
ADIOI_Free(file_lengths);
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
error_state:
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_TESTFS_operations = {
|
||||
ADIOI_TESTFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_TESTFS_ReadContig, /* ReadContig */
|
||||
ADIOI_TESTFS_WriteContig, /* WriteContig */
|
||||
ADIOI_TESTFS_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_TESTFS_operations = {
|
||||
ADIOI_TESTFS_Flush, /* Flush */
|
||||
ADIOI_TESTFS_Resize, /* Resize */
|
||||
ADIOI_TESTFS_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
};
|
||||
|
@ -7,7 +7,9 @@
|
||||
|
||||
#include "ad_testfs.h"
|
||||
#include "adioi.h"
|
||||
|
||||
#ifdef ROMIO_BGL
|
||||
#include "../ad_bgl/ad_bgl.h"
|
||||
#endif
|
||||
void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
int myrank, nprocs;
|
||||
@ -21,5 +23,10 @@ void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_SetInfo\n",
|
||||
myrank, nprocs);
|
||||
|
||||
#ifdef ROMIO_BGL /* BlueGene support for pvfs through ufs */
|
||||
/* BlueGene hack: force testfs to mimic BlueGene hints */
|
||||
ADIOI_BGL_SetInfo(fd, users_info, error_code);
|
||||
#else
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
#endif
|
||||
}
|
||||
|
@ -26,10 +26,6 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
offset = fd->fp_ind;
|
||||
fd->fp_ind += datatype_size * count;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
#if 0
|
||||
FPRINTF(stdout, "[%d/%d] new file position is %lld\n", myrank,
|
||||
nprocs, (long long) fd->fp_ind);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + datatype_size * count;
|
||||
|
@ -26,8 +26,8 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
|
||||
ADIO_Offset off;
|
||||
ADIOI_Flatlist_node *flat_file;
|
||||
int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int size_in_filetype, sum;
|
||||
ADIO_Offset abs_off_in_filetype=0, sum;
|
||||
int size_in_filetype;
|
||||
int filetype_size, etype_size, filetype_is_contig;
|
||||
MPI_Aint filetype_extent;
|
||||
|
||||
@ -54,6 +54,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
|
||||
}
|
||||
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
ADIOI_Assert((offset / n_etypes_in_filetype) == (int) (offset / n_etypes_in_filetype));
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
@ -70,7 +71,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent +
|
||||
off = fd->disp + (ADIO_Offset)n_filetypes * (ADIO_Offset)filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ void ADIOI_TESTFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
nprocs, fd->filename);
|
||||
FPRINTF(stdout, "[%d/%d] writing (buf = %p, loc = %lld, sz = %lld)\n",
|
||||
myrank, nprocs, buf, (long long) offset,
|
||||
(long long) datatype_size * count);
|
||||
(long long)datatype_size * (long long)count);
|
||||
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_UFS_operations = {
|
||||
ADIOI_UFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_GEN_ReadContig, /* ReadContig */
|
||||
ADIOI_GEN_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -38,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_UFS_operations = {
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
};
|
||||
|
@ -22,13 +22,9 @@ noinst_LTLIBRARIES = libadio_xfs.la
|
||||
libadio_xfs_la_SOURCES = \
|
||||
ad_xfs.c \
|
||||
ad_xfs.h \
|
||||
ad_xfs_done.c \
|
||||
ad_xfs_fcntl.c \
|
||||
ad_xfs_hints.c \
|
||||
ad_xfs_iread.c \
|
||||
ad_xfs_iwrite.c \
|
||||
ad_xfs_open.c \
|
||||
ad_xfs_read.c \
|
||||
ad_xfs_resize.c \
|
||||
ad_xfs_wait.c \
|
||||
ad_xfs_write.c
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_XFS_operations = {
|
||||
ADIOI_XFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_XFS_ReadContig, /* ReadContig */
|
||||
ADIOI_XFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
@ -22,15 +23,21 @@ struct ADIOI_Fns_struct ADIO_XFS_operations = {
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
ADIOI_XFS_IreadContig, /* IreadContig */
|
||||
ADIOI_XFS_IwriteContig, /* IwriteContig */
|
||||
ADIOI_XFS_ReadDone, /* ReadDone */
|
||||
ADIOI_XFS_WriteDone, /* WriteDone */
|
||||
ADIOI_XFS_ReadComplete, /* ReadComplete */
|
||||
ADIOI_XFS_WriteComplete, /* WriteComplete */
|
||||
#if defined(ROMIO_HAVE_WORKING_AIO)
|
||||
ADIOI_GEN_IreadContig, /* IreadContig */
|
||||
ADIOI_GEN_IwriteContig, /* IwriteContig */
|
||||
#else
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
#endif /* ROMIO_HAVE_WORKING_AIO */
|
||||
ADIOI_GEN_IODone, /* ReadDone */
|
||||
ADIOI_GEN_IODone, /* WriteDone */
|
||||
ADIOI_GEN_IOComplete, /* ReadComplete */
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_XFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
};
|
||||
|
@ -8,20 +8,19 @@
|
||||
#ifndef AD_XFS_INCLUDE
|
||||
#define AD_XFS_INCLUDE
|
||||
|
||||
#define _XOPEN_SOURCE 500
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
#include <aio.h>
|
||||
|
||||
int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, void *handle);
|
||||
|
||||
#if (defined(HAVE_PREAD64) && (_ABIO32 == 1))
|
||||
# define pread pread64
|
||||
# define pwrite pwrite64
|
||||
#if defined(MPISGI)
|
||||
#include "xfs/xfs_fs.h"
|
||||
#ifndef __USE_LARGEFILE64
|
||||
#define __USE_LARGEFILE64
|
||||
#endif
|
||||
typedef struct aiocb64 aiocb64_t;
|
||||
#endif
|
||||
/* above needed for IRIX 6.5 */
|
||||
|
||||
void ADIOI_XFS_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_XFS_Close(ADIO_File fd, int *error_code);
|
||||
@ -33,22 +32,6 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
*error_code);
|
||||
void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
|
@ -1,69 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_xfs.h"
|
||||
|
||||
int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
int err, done=0;
|
||||
static char myname[] = "ADIOI_XFS_READDONE";
|
||||
|
||||
if (*request == ADIO_REQUEST_NULL) {
|
||||
*error_code = MPI_SUCCESS;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((*request)->queued) {
|
||||
errno = aio_error64((const aiocb64_t *) (*request)->handle);
|
||||
if (errno == EINPROGRESS) {
|
||||
done = 0;
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
else {
|
||||
err = aio_return64((aiocb64_t *) (*request)->handle);
|
||||
(*request)->nbytes = err;
|
||||
errno = aio_error64((const aiocb64_t *) (*request)->handle);
|
||||
|
||||
done = 1;
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
} /* if ((*request)->queued) */
|
||||
else {
|
||||
done = 1;
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (done && ((*request)->nbytes != -1))
|
||||
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
|
||||
#endif
|
||||
|
||||
if (done) {
|
||||
/* if request is still queued in the system, it is also there
|
||||
on ADIOI_Async_list. Delete it from there. */
|
||||
if ((*request)->queued) ADIOI_Del_req_from_list(request);
|
||||
|
||||
(*request)->fd->async_count--;
|
||||
if ((*request)->handle) ADIOI_Free((*request)->handle);
|
||||
ADIOI_Free_request((ADIOI_Req_node *) (*request));
|
||||
*request = ADIO_REQUEST_NULL;
|
||||
/* status to be filled */
|
||||
}
|
||||
return done;
|
||||
}
|
||||
|
||||
|
||||
int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
return ADIOI_XFS_ReadDone(request, status, error_code);
|
||||
}
|
@ -7,6 +7,11 @@
|
||||
|
||||
#include "ad_xfs.h"
|
||||
#include "adio_extern.h"
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
@ -37,7 +42,7 @@ void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
|
||||
fl.l_len = fcntl_struct->diskspace;
|
||||
|
||||
#if defined(LINUX) && defined(MPISGI)
|
||||
err = fcntl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
|
||||
err = ioctl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
|
||||
#else
|
||||
err = fcntl(fd->fd_sys, F_RESVSP64, &fl);
|
||||
#endif
|
||||
|
@ -8,36 +8,76 @@
|
||||
#include "ad_xfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
static unsigned xfs_direct_read_chunk_size;
|
||||
static unsigned xfs_direct_write_chunk_size;
|
||||
|
||||
void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
char *value;
|
||||
char *value, * c;
|
||||
int flag;
|
||||
static char xfs_initialized = 0;
|
||||
|
||||
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
|
||||
|
||||
/* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
|
||||
* so protect the calls to MPI_Info_set */
|
||||
if (fd->info != MPI_INFO_NULL ) {
|
||||
MPI_Info_set(fd->info, "direct_read", "false");
|
||||
MPI_Info_set(fd->info, "direct_write", "false");
|
||||
ADIOI_Info_set(fd->info, "direct_read", "false");
|
||||
ADIOI_Info_set(fd->info, "direct_write", "false");
|
||||
fd->direct_read = fd->direct_write = 0;
|
||||
|
||||
if (!xfs_initialized) {
|
||||
xfs_initialized = 1;
|
||||
c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
|
||||
if (c) {
|
||||
int io;
|
||||
io = atoi(c);
|
||||
if (io <= 0) {
|
||||
fprintf(stderr,
|
||||
"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
|
||||
" It must be set to a positive integer value.\n");
|
||||
} else {
|
||||
xfs_direct_read_chunk_size = io;
|
||||
}
|
||||
} else {
|
||||
xfs_direct_read_chunk_size = 0;
|
||||
}
|
||||
|
||||
/* has user specified values for keys "direct_read" and "direct wirte"? */
|
||||
c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
|
||||
if (c) {
|
||||
int io;
|
||||
io = atoi(c);
|
||||
if (io <= 0) {
|
||||
fprintf(stderr,
|
||||
"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
|
||||
" It must be set to a positive integer value.\n");
|
||||
} else {
|
||||
xfs_direct_write_chunk_size = io;
|
||||
}
|
||||
} else {
|
||||
xfs_direct_write_chunk_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fd->hints->initialized) {
|
||||
fd->hints->fs_hints.xfs.read_chunk_sz =
|
||||
xfs_direct_read_chunk_size;
|
||||
fd->hints->fs_hints.xfs.write_chunk_sz =
|
||||
xfs_direct_write_chunk_size;
|
||||
}
|
||||
|
||||
/* has user specified values for keys "direct_read" and "direct write"? */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && !strcmp(value, "true")) {
|
||||
MPI_Info_set(fd->info, "direct_read", "true");
|
||||
ADIOI_Info_set(fd->info, "direct_read", "true");
|
||||
fd->direct_read = 1;
|
||||
}
|
||||
|
||||
MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
|
||||
ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && !strcmp(value, "true")) {
|
||||
MPI_Info_set(fd->info, "direct_write", "true");
|
||||
ADIOI_Info_set(fd->info, "direct_write", "true");
|
||||
fd->direct_write = 1;
|
||||
}
|
||||
|
||||
@ -47,8 +87,10 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
|
||||
/* Environment variables override MPI_Info hints */
|
||||
if (ADIOI_Direct_read) fd->direct_read = 1;
|
||||
if (ADIOI_Direct_write) fd->direct_write = 1;
|
||||
|
||||
/* environment variables checked in ADIO_Init */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
@ -1,42 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_xfs.h"
|
||||
|
||||
void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int *error_code)
|
||||
{
|
||||
int len, typesize, aio_errno = 0;
|
||||
static char myname[] = "ADIOI_XFS_IREADCONTIG";
|
||||
|
||||
(*request) = ADIOI_Malloc_request();
|
||||
(*request)->optype = ADIOI_READ;
|
||||
(*request)->fd = fd;
|
||||
(*request)->datatype = datatype;
|
||||
|
||||
MPI_Type_size(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
|
||||
aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 0, &((*request)->handle));
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
|
||||
|
||||
(*request)->queued = 1;
|
||||
ADIOI_Add_req_to_list(request);
|
||||
|
||||
fd->fp_sys_posn = -1;
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (aio_errno != 0) {
|
||||
MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
fd->async_count++;
|
||||
}
|
@ -1,145 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_xfs.h"
|
||||
|
||||
void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request,
|
||||
int *error_code)
|
||||
{
|
||||
int len, typesize, aio_errno = 0;
|
||||
static char myname[] = "ADIOI_XFS_IWRITECONTIG";
|
||||
|
||||
*request = ADIOI_Malloc_request();
|
||||
(*request)->optype = ADIOI_WRITE;
|
||||
(*request)->fd = fd;
|
||||
(*request)->datatype = datatype;
|
||||
|
||||
MPI_Type_size(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
|
||||
aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 1, &((*request)->handle));
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
|
||||
|
||||
(*request)->queued = 1;
|
||||
ADIOI_Add_req_to_list(request);
|
||||
|
||||
fd->fp_sys_posn = -1;
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (aio_errno != 0) {
|
||||
MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
fd->async_count++;
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_XFS_IwriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code)
|
||||
{
|
||||
ADIO_Status status;
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
int typesize;
|
||||
#endif
|
||||
|
||||
*request = ADIOI_Malloc_request();
|
||||
(*request)->optype = ADIOI_WRITE;
|
||||
(*request)->fd = fd;
|
||||
(*request)->datatype = datatype;
|
||||
(*request)->queued = 0;
|
||||
(*request)->handle = 0;
|
||||
|
||||
/* call the blocking version. It is faster because it does data sieving. */
|
||||
ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
|
||||
offset, &status, error_code);
|
||||
|
||||
fd->async_count++;
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (*error_code == MPI_SUCCESS) {
|
||||
MPI_Type_size(datatype, &typesize);
|
||||
(*request)->nbytes = count * typesize;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* This function is for implementation convenience. It is not user-visible.
|
||||
* It takes care of the differences in the interface for nonblocking I/O
|
||||
* on various Unix machines! If wr==1 write, wr==0 read.
|
||||
*
|
||||
* Returns 0 on success, -errno on failure.
|
||||
*/
|
||||
int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, void *handle)
|
||||
{
|
||||
int err, error_code;
|
||||
aiocb64_t *aiocbp;
|
||||
|
||||
aiocbp = (aiocb64_t *) ADIOI_Calloc(sizeof(aiocb64_t), 1);
|
||||
|
||||
if (((wr && fd->direct_write) || (!wr && fd->direct_read))
|
||||
&& !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
|
||||
!(len % fd->d_miniosz) && (len >= fd->d_miniosz) &&
|
||||
(len <= fd->d_maxiosz))
|
||||
aiocbp->aio_fildes = fd->fd_direct;
|
||||
else aiocbp->aio_fildes = fd->fd_sys;
|
||||
|
||||
aiocbp->aio_offset = offset;
|
||||
aiocbp->aio_buf = buf;
|
||||
aiocbp->aio_nbytes = len;
|
||||
aiocbp->aio_reqprio = 0;
|
||||
|
||||
#ifdef AIO_SIGNOTIFY_NONE
|
||||
/* SGI IRIX 6 */
|
||||
aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE;
|
||||
#else
|
||||
aiocbp->aio_sigevent.sigev_signo = 0;
|
||||
#endif
|
||||
|
||||
if (wr) err = aio_write64(aiocbp);
|
||||
else err = aio_read64(aiocbp);
|
||||
|
||||
if (err != 0) {
|
||||
if (errno == EAGAIN) {
|
||||
/* exceeded the max. no. of outstanding requests.
|
||||
complete all previous async. requests and try again. */
|
||||
|
||||
/* ADIOI_Complete_async(&error_code); */
|
||||
if (error_code != MPI_SUCCESS) return -EIO;
|
||||
|
||||
if (wr) err = aio_write64(aiocbp);
|
||||
else err = aio_read64(aiocbp);
|
||||
|
||||
while (err != 0) {
|
||||
if (errno == EAGAIN) {
|
||||
/* sleep and try again */
|
||||
sleep(1);
|
||||
if (wr) err = aio_write64(aiocbp);
|
||||
else err = aio_read64(aiocbp);
|
||||
}
|
||||
else {
|
||||
return -errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -errno;
|
||||
}
|
||||
}
|
||||
|
||||
*((aiocb64_t **) handle) = aiocbp;
|
||||
return 0;
|
||||
}
|
@ -5,22 +5,26 @@
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE // for O_DIRECT
|
||||
|
||||
#include "ad_xfs.h"
|
||||
#include <sys/ioctl.h>
|
||||
#ifdef HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
#if defined(MPISGI)
|
||||
#include <mpitypedefs.h>
|
||||
#include <mpifunctions.h>
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, amode, amode_direct;
|
||||
int perm, amode, amode_direct, factor;
|
||||
unsigned int old_mask;
|
||||
struct dioattr st;
|
||||
static char myname[] = "ADIOI_XFS_OPEN";
|
||||
unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
|
||||
unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
@ -49,7 +53,7 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
|
||||
fd->fd_direct = open(fd->filename, amode_direct, perm);
|
||||
if (fd->fd_direct != -1) {
|
||||
|
||||
#if defined(LINUX) && defined(MPISGI)
|
||||
#if defined(MPISGI)
|
||||
ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
|
||||
#else
|
||||
fcntl(fd->fd_direct, F_DIOINFO, &st);
|
||||
@ -57,7 +61,34 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
|
||||
|
||||
fd->d_mem = st.d_mem;
|
||||
fd->d_miniosz = st.d_miniosz;
|
||||
fd->d_maxiosz = st.d_maxiosz;
|
||||
|
||||
if (read_chunk_sz == 0) {
|
||||
fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
|
||||
} else {
|
||||
/*
|
||||
* MPIO_DIRECT_READ_CHUNK_SIZE was set.
|
||||
* Make read_chunk_sz a multiple of d_miniosz.
|
||||
*/
|
||||
factor = read_chunk_sz / fd->d_miniosz;
|
||||
if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
|
||||
fd->hints->fs_hints.xfs.read_chunk_sz =
|
||||
fd->d_miniosz * (factor + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (write_chunk_sz == 0) {
|
||||
fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
|
||||
} else {
|
||||
/*
|
||||
* MPIO_DIRECT_WRITE_CHUNK_SIZE was set.
|
||||
* Make write_chunk_sz a multiple of d_miniosz.
|
||||
*/
|
||||
factor = write_chunk_sz / fd->d_miniosz;
|
||||
if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
|
||||
fd->hints->fs_hints.xfs.write_chunk_sz =
|
||||
fd->d_miniosz * (factor + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (fd->d_mem > XFS_MEMALIGN) {
|
||||
FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user