diff --git a/ompi/mca/io/romio/romio/.config_params b/ompi/mca/io/romio/romio/.config_params index 6a875db219..07ac7a9c00 100644 --- a/ompi/mca/io/romio/romio/.config_params +++ b/ompi/mca/io/romio/romio/.config_params @@ -35,4 +35,5 @@ __sgi_mpi __hp_mpi __cray_mpi __lam_mpi +__Darwin __open_mpi diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache b/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache deleted file mode 100644 index 10ea8a7bf6..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache +++ /dev/null @@ -1,58 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am index ac55cf7d76..b0ac2f8b32 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am @@ -26,6 +26,7 @@ libadio_bgl_la_SOURCES = \ ad_bgl.c \ ad_bgl_close.c \ ad_bgl_fcntl.c \ + ad_bgl_flush.c \ ad_bgl_getsh.c \ ad_bgl.h \ ad_bgl_hints.c \ diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c index 2911543cd9..e17cf31fa1 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c @@ -1,5 +1,6 @@ /* ---------------------------------------------------------------- */ /* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ /** * \file ad_bgl.c * \brief ??? @@ -18,6 +19,7 @@ struct ADIOI_Fns_struct ADIO_BGL_operations = { ADIOI_BGL_Open, /* Open */ + ADIOI_GEN_OpenColl, /* Collective open */ ADIOI_BGL_ReadContig, /* ReadContig */ ADIOI_BGL_WriteContig, /* WriteContig */ #if BGL_OPTIM_STEP1_2 @@ -51,7 +53,8 @@ struct ADIOI_Fns_struct ADIO_BGL_operations = { ADIOI_GEN_IOComplete, /* WriteComplete */ ADIOI_GEN_IreadStrided, /* IreadStrided */ ADIOI_GEN_IwriteStrided, /* IwriteStrided */ - ADIOI_GEN_Flush, /* Flush */ + ADIOI_BGL_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h index 1209785be1..89d247b0df 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h @@ -28,8 +28,10 @@ #include #endif +#if 0 int ADIOI_BGL_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int wr, void *handle); +#endif void ADIOI_BGL_Open(ADIO_File fd, int *error_code); @@ -87,6 +89,7 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code); void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code); +void ADIOI_BGL_Flush(ADIO_File fd, int *error_code); #include "ad_bgl_tuning.h" diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c index 5e14af114c..39aeb2c05f 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c @@ -1,5 +1,6 @@ /* ---------------------------------------------------------------- */ /* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ /** * \file ad_bgl_aggrs.c * \brief The externally used function from this file is is declared in ad_bgl_aggrs.h @@ -7,7 +8,7 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997-2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,10 +17,49 @@ #include "ad_bgl.h" #include "ad_bgl_pset.h" #include "ad_bgl_aggrs.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + +#ifdef USE_DBG_LOGGING + #define AGG_DEBUG 1 +#endif -int aggrsInPsetSize=0; -int *aggrsInPset=NULL; + +static int aggrsInPsetSize=0; +static int *aggrsInPset=NULL; + +/* Comments copied from common: + * This file contains four functions: + * + * ADIOI_Calc_aggregator() + * ADIOI_Calc_file_domains() + * ADIOI_Calc_my_req() + * ADIOI_Calc_others_req() + * + * The last three of these were originally in ad_read_coll.c, but they are + * also shared with ad_write_coll.c. I felt that they were better kept with + * the rest of the shared aggregation code. + */ + +/* Discussion of values available from above: + * + * ADIO_Offset st_offsets[0..nprocs-1] + * ADIO_Offset end_offsets[0..nprocs-1] + * These contain a list of start and end offsets for each process in + * the communicator. For example, an access at loc 10, size 10 would + * have a start offset of 10 and end offset of 19. + * int nprocs + * number of processors in the collective I/O communicator + * ADIO_Offset min_st_offset + * ADIO_Offset fd_start[0..nprocs_for_coll-1] + * starting location of "file domain"; region that a given process will + * perform aggregation for (i.e. actually do I/O) + * ADIO_Offset fd_end[0..nprocs_for_coll-1] + * start + size - 1 roughly, but it can be less, or 0, in the case of + * uneven distributions + */ /* forward declaration */ static void @@ -219,8 +259,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd, ADIOI_BGL_ProcInfo_t *all_procInfo, int *aggrsInPset ) { -# define DEBUG 0 -# if DEBUG +# if AGG_DEBUG int i; # endif int naggs; @@ -229,9 +268,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd, /* compute the ranklist of IO aggregators and put into tmp_ranklist */ tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int)); -# if DEBUG - for (i=0; inProcs; i++) - printf( "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank ); +# if AGG_DEBUG + for (i=0; inProcs; i++) { + DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank ); + } # endif naggs = @@ -239,7 +279,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd, # define VERIFY 0 # if VERIFY - printf( "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n", + DBG_FPRINTF(stderr, "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n", confInfo->PsetSize , confInfo->numPsets , confInfo->isVNM , @@ -250,9 +290,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd, naggs ); # endif -# if DEBUG - for (i=0; ihints */ @@ -267,293 +308,34 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd, return; } - - -/* - * Compute a dynamic access range based file domain partition among I/O aggregators, - * which align to the GPFS block size - * Divide the I/O workload among "nprocs_for_coll" processes. This is - * done by (logically) dividing the file into file domains (FDs); each - * process may directly access only its own file domain. - * Additional effort is to make sure that each I/O aggregator get - * a file domain that aligns to the GPFS block size. So, there will - * not be any false sharing of GPFS file blocks among multiple I/O nodes. - */ -void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets, - ADIO_Offset *end_offsets, - int nprocs, - int nprocs_for_coll, - ADIO_Offset *min_st_offset_ptr, - ADIO_Offset **fd_start_ptr, - ADIO_Offset **fd_end_ptr, - ADIO_Offset *fd_size_ptr, - void *fs_ptr) -{ - ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size; - int i, aggr; - static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains"; - __blksize_t blksize = 1048576; /* default to 1M */ - if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */ - blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize; -/* FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);*/ - - /* find the range of all the requests */ - min_st_offset = st_offsets [0]; - max_end_offset = end_offsets[0]; - for (i=1; ihints->cb_nodes; i++) - if (fd->hints->ranklist[i] == myrank) return i; - return -1; -} - /* * This is more general aggregator search function which does not base on the assumption * that each aggregator hosts the file domain with the same size @@ -574,6 +356,21 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd, /* binary search --> rank_index is returned */ int ub = fd->hints->cb_nodes; int lb = 0; + /* get an index into our array of aggregators */ + /* Common code for striping - bgl doesn't use it but it's + here to make diff'ing easier. + rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1); + + if (fd->hints->striping_unit > 0) { + * wkliao: implementation for file domain alignment + fd_start[] and fd_end[] have been aligned with file lock + boundaries when returned from ADIOI_Calc_file_domains() so cannot + just use simple arithmatic as above * + rank_index = 0; + while (off > fd_end[rank_index]) rank_index++; + } + bgl does it's own striping below + */ rank_index = fd->hints->cb_nodes / 2; while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) { if ( off > fd_end [rank_index] ) { @@ -586,8 +383,15 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd, rank_index = (rank_index + lb) / 2; } } - - // printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index ); + /* we index into fd_end with rank_index, and fd_end was allocated to be no + * bigger than fd->hins->cb_nodes. If we ever violate that, we're + * overrunning arrays. Obviously, we should never ever hit this abort */ + if (rank_index >= fd->hints->cb_nodes || rank_index < 0) { + FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n", + rank_index,fd->hints->cb_nodes,fd_size,off); + MPI_Abort(MPI_COMM_WORLD, 1); + } + // DBG_FPRINTF ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index ); /* * remember here that even in Rajeev's original code it was the case that @@ -611,16 +415,161 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd, return rank; } +/* + * Compute a dynamic access range based file domain partition among I/O aggregators, + * which align to the GPFS block size + * Divide the I/O workload among "nprocs_for_coll" processes. This is + * done by (logically) dividing the file into file domains (FDs); each + * process may directly access only its own file domain. + * Additional effort is to make sure that each I/O aggregator get + * a file domain that aligns to the GPFS block size. So, there will + * not be any false sharing of GPFS file blocks among multiple I/O nodes. + * + * The common version of this now accepts a min_fd_size and striping_unit. + * It doesn't seem necessary here (using GPFS block sizes) but keep it in mind + * (e.g. we could pass striping unit instead of using fs_ptr->blksize). + */ +void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets, + ADIO_Offset *end_offsets, + int nprocs, + int nprocs_for_coll, + ADIO_Offset *min_st_offset_ptr, + ADIO_Offset **fd_start_ptr, + ADIO_Offset **fd_end_ptr, + ADIO_Offset *fd_size_ptr, + void *fs_ptr) +{ + ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size; + int i, aggr; + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5004, 0, NULL); +#endif + +# if AGG_DEBUG + static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains"; + DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n", + myname,__LINE__,nprocs_for_coll); +# endif + __blksize_t blksize = 1048576; /* default to 1M */ + if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */ + blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize; +# if AGG_DEBUG + DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize); +# endif +/* find min of start offsets and max of end offsets of all processes */ + min_st_offset = st_offsets [0]; + max_end_offset = end_offsets[0]; + for (i=1; ihints->cb_nodes; i++) + if (fd->hints->ranklist[i] == myrank) return i; + return -1; +} /* * ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation * is specific for static file domain partitioning. * - * ADIOI_Calc_my_req() calculate what portions of the access requests + * ADIOI_Calc_my_req() - calculate what portions of the access requests * of this process are located in the file domains of various processes * (including this one) */ -void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list, +void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, @@ -629,12 +578,17 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list int **count_my_req_per_proc_ptr, ADIOI_Access **my_req_ptr, int **buf_idx_ptr) +/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets? + They are used as memory buffer indices so it seems like the 2G limit is in effect */ { int *count_my_req_per_proc, count_my_req_procs, *buf_idx; int i, l, proc; ADIO_Offset fd_len, rem_len, curr_idx, off; ADIOI_Access *my_req; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5024, 0, NULL); +#endif *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); count_my_req_per_proc = *count_my_req_per_proc_ptr; @@ -656,10 +610,10 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list * contig_access_count was calculated way back in ADIOI_Calc_my_off_len() */ for (i=0; i < contig_access_count; i++) { - - /* When there is no data being processed, bypass this loop */ - if (len_list[i] == 0) continue; - + /* short circuit offset/len processing if len == 0 + * (zero-byte read/write */ + if (len_list[i] == 0) + continue; off = offset_list[i]; fd_len = len_list[i]; /* note: we set fd_len to be the total size of the access. then @@ -710,20 +664,24 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list /* now fill in my_req */ curr_idx = 0; for (i=0; i 0) { - FPRINTF(stdout, "data needed from %d (count = %d):\n", i, + DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { - FPRINTF(stdout, " off[%d] = %Ld, len[%d] = %d\n", l, + DBG_FPRINTF(stderr, " off[%d] = %lld, len[%d] = %d\n", l, my_req[i].offsets[l], l, my_req[i].lens[l]); } } + DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]); } -#if 0 - for (i=0; i - extern int *aggrsInPset; /* defined in ad_bgl_aggrs.c */ +#if !defined(GPFS_SUPER_MAGIC) + #define GPFS_SUPER_MAGIC (0x47504653) +#endif +#if !defined(PVFS2_SUPER_MAGIC) + #define PVFS2_SUPER_MAGIC (0x20030528) +#endif /* File system (BGL) specific information - hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */ typedef struct ADIOI_BGL_fs_s { __blksize_t blksize; + int fsync_aggr; /* "fsync aggregation" flags (below) */ +#define ADIOI_BGL_FSYNC_AGGREGATION_DISABLED 0x00 +#define ADIOI_BGL_FSYNC_AGGREGATION_ENABLED 0x01 +#define ADIOI_BGL_FSYNC_AGGREGATOR 0x10 /* This rank is an aggregator */ } ADIOI_BGL_fs; /* generate a list of I/O aggregators that utilizes BGL-PSET orginization. */ @@ -60,7 +70,7 @@ /* overriding ADIOI_Calc_my_req for the default implementation is specific for static file domain partitioning */ - void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, int *len_list, + void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c index 63f620446f..8fcf857507 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c @@ -1,7 +1,8 @@ /* ---------------------------------------------------------------- */ /* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ /** - * \file ad_bgl_open.c + * \file ad_bgl_close.c * \brief ??? */ diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c index c47c2aa5b5..afae2c2988 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c @@ -1,5 +1,6 @@ /* ---------------------------------------------------------------- */ /* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ /** * \file ad_bgl_fcntl.c * \brief ??? diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c new file mode 100644 index 0000000000..97fd2ca99e --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c @@ -0,0 +1,90 @@ +/* ---------------------------------------------------------------- */ +/* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ +/** + * \file ad_bgl_flush.c + * \brief Scalable flush based on underlying filesystem and psets + */ + +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_bgl.h" +#include "ad_bgl_aggrs.h" + +void ADIOI_BGL_Flush(ADIO_File fd, int *error_code) +{ + int err=0; + static char myname[] = "ADIOI_BGL_FLUSH"; + + + if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATION_ENABLED) + { + int rank; + + /* Barrier so we can collectively do fewer fsync's */ + MPI_Barrier(fd->comm); + + MPI_Comm_rank(fd->comm, &rank); + + /* All ranks marked as "fsync aggregators" should fsync. + (We currently only do one fsync on rank 0 but this is general + enough to support >1 aggregator using allreduce to get the + results instead of simply bcast'ing the results from rank 0.)*/ + if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATOR) + { + err = fsync(fd->fd_sys); + DBG_FPRINTF(stderr,"aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno); + /* We want errno, not the return code if it failed */ + if (err == -1) err = errno; + else err = 0; + } + /* Just pick an errno (using unsigned MPI_MAX) from any failures */ + MPI_Allreduce( MPI_IN_PLACE, (unsigned*)&err, 1, MPI_UNSIGNED, MPI_MAX, fd->comm); + DBGV_FPRINTF(stderr,"aggregation result:fsync %s, errno %#X,\n",fd->filename, err); + + if (err) /* if it's non-zero, it must be an errno */ + { + errno = err; + err = -1; + } + } + else /* Non-aggregated fsync */ + { +#ifdef USE_DBG_LOGGING + int rank; +#endif + err = fsync(fd->fd_sys); +#ifdef USE_DBG_LOGGING + MPI_Comm_rank(fd->comm, &rank); + + if(rank == 0) + { + DBG_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno); + } + else + { + DBGV_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno); + } +#endif + } + + /* --BEGIN ERROR HANDLING-- */ + if (err == -1) + { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, + myname, __LINE__, MPI_ERR_IO, + "**io", + "**io %s", strerror(errno)); + DBGT_FPRINTF(stderr,"fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno); + return; + } + /* --END ERROR HANDLING-- */ + + *error_code = MPI_SUCCESS; +} + diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c index 8a233b2783..614f2cf618 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c @@ -38,8 +38,8 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Info info; char *value; - int flag, intval, tmp_val, nprocs, nprocs_is_valid = 0; - static char myname[] = "ADIOI_GEN_SETINFO"; + int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0; + static char myname[] = "ADIOI_BGL_SETINFO"; int did_anything = 0; @@ -61,15 +61,15 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) did_anything = 1; /* buffer size for collective I/O */ - MPI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT); fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use * collective buffering */ - MPI_Info_set(info, "romio_cb_read", "enable"); + ADIOI_Info_set(info, "romio_cb_read", "enable"); fd->hints->cb_read = ADIOI_HINT_ENABLE; - MPI_Info_set(info, "romio_cb_write", "enable"); + ADIOI_Info_set(info, "romio_cb_write", "enable"); fd->hints->cb_write = ADIOI_HINT_ENABLE; if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list); @@ -78,30 +78,54 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* number of processes that perform I/O in collective I/O */ MPI_Comm_size(fd->comm, &nprocs); nprocs_is_valid = 1; - sprintf(value, "%d", nprocs); - MPI_Info_set(info, "cb_nodes", value); + ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs); + ADIOI_Info_set(info, "cb_nodes", value); fd->hints->cb_nodes = -1; /* hint indicating that no indep. I/O will be performed on this file */ - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = 0; - /* deferred_open derrived from no_indep_rw and cb_{read,write} */ + + /* bgl is not implementing file realms (ADIOI_IOStridedColl), + initialize to disabled it. */ + /* hint instructing the use of persistent file realms */ + ADIOI_Info_set(info, "romio_cb_pfr", "disable"); + fd->hints->cb_pfr = ADIOI_HINT_DISABLE; + + /* hint guiding the assignment of persistent file realms */ + ADIOI_Info_set(info, "romio_cb_fr_types", "aar"); + fd->hints->cb_fr_type = ADIOI_FR_AAR; + + /* hint to align file realms with a certain byte value */ + ADIOI_Info_set(info, "romio_cb_fr_alignment", "1"); + fd->hints->cb_fr_alignment = 1; + + /* hint to set a threshold percentage for a datatype's size/extent at + * which data sieving should be done in collective I/O */ + ADIOI_Info_set(info, "romio_cb_ds_threshold", "0"); + fd->hints->cb_ds_threshold = 0; + + /* hint to switch between point-to-point or all-to-all for two-phase */ + ADIOI_Info_set(info, "romio_cb_alltoall", "automatic"); + fd->hints->cb_alltoall = ADIOI_HINT_AUTO; + + /* deferred_open derived from no_indep_rw and cb_{read,write} */ fd->hints->deferred_open = 0; /* buffer size for data sieving in independent reads */ - MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT); fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT); /* buffer size for data sieving in independent writes */ - MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT); fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT); if(fd->file_system == ADIO_UFS) { /* default for ufs/pvfs is to disable data sieving */ - MPI_Info_set(info, "romio_ds_read", "disable"); + ADIOI_Info_set(info, "romio_ds_read", "disable"); fd->hints->ds_read = ADIOI_HINT_DISABLE; - MPI_Info_set(info, "romio_ds_write", "disable"); + ADIOI_Info_set(info, "romio_ds_write", "disable"); fd->hints->ds_write = ADIOI_HINT_DISABLE; } else @@ -109,18 +133,23 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* default is to let romio automatically decide when to use data * sieving */ - MPI_Info_set(info, "romio_ds_read", "automatic"); + ADIOI_Info_set(info, "romio_ds_read", "automatic"); fd->hints->ds_read = ADIOI_HINT_AUTO; - MPI_Info_set(info, "romio_ds_write", "automatic"); + ADIOI_Info_set(info, "romio_ds_write", "automatic"); fd->hints->ds_write = ADIOI_HINT_AUTO; } - fd->hints->initialized = 1; + /* still to do: tune this a bit for a variety of file systems. there's + * no good default value so just leave it unset */ + fd->hints->min_fdomain_size = 0; + fd->hints->striping_unit = 0; + + fd->hints->initialized = 1; } /* add in user's info if supplied */ if (users_info != MPI_INFO_NULL) { - MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval=atoi(value)) > 0)) { tmp_val = intval; @@ -135,30 +164,106 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ - MPI_Info_set(info, "cb_buffer_size", value); + ADIOI_Info_set(info, "cb_buffer_size", value); fd->hints->cb_buffer_size = intval; } +#if 0 + /* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */ + /* aligning file realms to certain sizes (e.g. stripe sizes) + * may benefit I/O performance */ + ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) > 0)) { + tmp_val = intval; + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_fr_alignment", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_fr_alignment", value); + fd->hints->cb_fr_alignment = intval; + + } + + /* for collective I/O, try to be smarter about when to do data sieving + * using a specific threshold for the datatype size/extent + * (percentage 0-100%) */ + ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) > 0)) { + tmp_val = intval; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_ds_threshold", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_ds_threshold", value); + fd->hints->cb_ds_threshold = intval; + + } + ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value, + &flag); + if (flag) { + if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_ENABLE; + } + else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_DISABLE; + } + else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_AUTO; + } + + tmp_val = fd->hints->cb_alltoall; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != fd->hints->cb_alltoall) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_alltoall", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + } +#endif /* new hints for enabling/disabling coll. buffering on * reads/writes */ - MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag); + ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, + &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_cb_read", value); fd->hints->cb_read = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { /* romio_cb_read overrides no_indep_rw */ - MPI_Info_set(info, "romio_cb_read", value); - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->cb_read = ADIOI_HINT_DISABLE; fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_cb_read", value); fd->hints->cb_read = ADIOI_HINT_AUTO; } @@ -174,24 +279,25 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ } - MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag); + ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, + &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_cb_write", value); fd->hints->cb_write = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { /* romio_cb_write overrides no_indep_rw, too */ - MPI_Info_set(info, "romio_cb_write", value); - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->cb_write = ADIOI_HINT_DISABLE; fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_cb_write", value); fd->hints->cb_write = ADIOI_HINT_AUTO; } @@ -208,23 +314,81 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } +#if 0 + /* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */ + /* enable/disable persistent file realms for collective I/O */ + /* may want to check for no_indep_rdwr hint as well */ + ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value, + &flag); + if (flag) { + if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_ENABLE; + } + else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_DISABLE; + } + else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_AUTO; + } + + tmp_val = fd->hints->cb_pfr; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != fd->hints->cb_pfr) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_pfr", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + } + + /* file realm assignment types ADIOI_FR_AAR(0), + ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify + a regular fr size in bytes. probably not the best way... */ + ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) >= -2)) { + tmp_val = intval; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_fr_type", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_fr_type", value); + fd->hints->cb_fr_type = intval; + + } +#endif /* new hint for specifying no indep. read/write will be performed */ - MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag); + ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, + &flag); if (flag) { if (!strcmp(value, "true") || !strcmp(value, "TRUE")) { /* if 'no_indep_rw' set, also hint that we will do * collective buffering: if we aren't doing independent io, * then we have to do collective */ - MPI_Info_set(info, "romio_no_indep_rw", value); - MPI_Info_set(info, "romio_cb_write", "enable"); - MPI_Info_set(info, "romio_cb_read", "enable"); + ADIOI_Info_set(info, "romio_no_indep_rw", value); + ADIOI_Info_set(info, "romio_cb_write", "enable"); + ADIOI_Info_set(info, "romio_cb_read", "enable"); fd->hints->no_indep_rw = 1; fd->hints->cb_read = 1; fd->hints->cb_write = 1; tmp_val = 1; } else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) { - MPI_Info_set(info, "romio_no_indep_rw", value); + ADIOI_Info_set(info, "romio_no_indep_rw", value); fd->hints->no_indep_rw = 0; tmp_val = 0; } @@ -246,64 +410,80 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* new hints for enabling/disabling data sieving on * reads/writes */ - MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_AUTO; } /* otherwise ignore */ } - MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_AUTO; } /* otherwise ignore */ } - MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval = atoi(value)) > 0)) { - MPI_Info_set(info, "ind_wr_buffer_size", value); + ADIOI_Info_set(info, "ind_wr_buffer_size", value); fd->hints->ind_wr_buffer_size = intval; } - MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval = atoi(value)) > 0)) { - MPI_Info_set(info, "ind_rd_buffer_size", value); + ADIOI_Info_set(info, "ind_rd_buffer_size", value); fd->hints->ind_rd_buffer_size = intval; } memset( value, 0, MPI_MAX_INFO_VAL+1 ); - MPI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL, + value, &flag); + if ( flag && ((intval = atoi(value)) > 0) ) { + ADIOI_Info_set(info, "romio_min_fdomain_size", value); + fd->hints->min_fdomain_size = intval; + } + /* Now we use striping unit in common code so we should + process hints for it. */ + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + value, &flag); + if ( flag && ((intval = atoi(value)) > 0) ) { + ADIOI_Info_set(info, "striping_unit", value); + fd->hints->striping_unit = intval; + } + + memset( value, 0, MPI_MAX_INFO_VAL+1 ); + ADIOI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval = atoi(value)) > 0)) { did_anything = 1; - MPI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value); + ADIOI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value); fd->hints->cb_nodes = intval; } } @@ -312,24 +492,30 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (did_anything) { ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes); } + /* ignore defered open hints and do not enable it for bluegene: need all + * processors in the open path so we can stat-and-broadcast the blocksize + */ + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); + fd->hints->no_indep_rw = 0; + fd->hints->deferred_open = 0; - /* deferred_open won't be set by callers, but if the user doesn't - * explicitly disable collecitve buffering (two-phase) and does hint that - * io w/o independent io is going on, we'll set this internal hint as a - * convenience */ - if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE) - && (fd->hints->cb_write != ADIOI_HINT_DISABLE) - && fd->hints->no_indep_rw ) ) - { - fd->hints->deferred_open = 1; - } else { - /* setting romio_no_indep_rw enable and romio_cb_{read,write} - * disable at the same time doesn't make sense. honor - * romio_cb_{read,write} and force the no_indep_rw hint to - * 'disable' */ - MPI_Info_set(info, "romio_no_indep_rw", "false"); - fd->hints->no_indep_rw = 0; - fd->hints->deferred_open = 0; + /* BobC commented this out, but since hint processing runs on both bgl and + * bglockless, we need to keep DS writes enabled on gpfs and disabled on + * PVFS */ + if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) { + /* disable data sieving for fs that do not + support file locking */ + ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, + value, &flag); + if (flag) { + /* get rid of this value if it is set */ + ADIOI_Info_delete(info, "ind_wr_buffer_size"); + } + /* note: leave ind_wr_buffer_size alone; used for other cases + * as well. -- Rob Ross, 04/22/2003 + */ + ADIOI_Info_set(info, "romio_ds_write", "disable"); + fd->hints->ds_write = ADIOI_HINT_DISABLE; } ADIOI_Free(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c index 3c7119291c..eeb40fbc0a 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c @@ -15,6 +15,181 @@ #include "ad_bgl.h" #include "ad_bgl_aggrs.h" +#include +#include + +/* COPIED FROM ad_fstype.c since it is static in that file + + ADIO_FileSysType_parentdir - determines a string pathname for the + parent directory of a given filename. + +Input Parameters: +. filename - pointer to file name character array + +Output Parameters: +. dirnamep - pointer to location in which to store a pointer to a string + + Note that the caller should free the memory located at the pointer returned + after the string is no longer needed. +*/ + +#ifndef PATH_MAX +#define PATH_MAX 65535 +#endif + +/* In a strict ANSI environment, S_ISLNK may not be defined. Fix that + here. We assume that S_ISLNK is *always* defined as a macro. If + that is not universally true, then add a test to the romio + configure that trys to link a program that references S_ISLNK */ +#if !defined(S_ISLNK) +# if defined(S_IFLNK) + /* Check for the link bit */ +# define S_ISLNK(mode) ((mode) & S_IFLNK) +# else + /* no way to check if it is a link, so say false */ +# define S_ISLNK(mode) 0 +# endif +#endif /* !(S_ISLNK) */ + +/* ADIO_FileSysType_parentdir + * + * Returns pointer to string in dirnamep; that string is allocated with + * strdup and must be free()'d. + */ +static void ADIO_FileSysType_parentdir(char *filename, char **dirnamep) +{ + int err; + char *dir = NULL, *slash; + struct stat statbuf; + + err = lstat(filename, &statbuf); + + if (err || (!S_ISLNK(statbuf.st_mode))) { + /* no such file, or file is not a link; these are the "normal" + * cases where we can just return the parent directory. + */ + dir = ADIOI_Strdup(filename); + } + else { + /* filename is a symlink. we've presumably already tried + * to stat it and found it to be missing (dangling link), + * but this code doesn't care if the target is really there + * or not. + */ + int namelen; + char *linkbuf; + + linkbuf = ADIOI_Malloc(PATH_MAX+1); + namelen = readlink(filename, linkbuf, PATH_MAX+1); + if (namelen == -1) { + /* something strange has happened between the time that + * we determined that this was a link and the time that + * we attempted to read it; punt and use the old name. + */ + dir = ADIOI_Strdup(filename); + } + else { + /* successfully read the link */ + linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */ + dir = ADIOI_Strdup(linkbuf); + ADIOI_Free(linkbuf); + } + } + + slash = strrchr(dir, '/'); + if (!slash) ADIOI_Strncpy(dir, ".", 2); + else { + if (slash == dir) *(dir + 1) = '\0'; + else *slash = '\0'; + } + + *dirnamep = dir; + return; +} + +static void scaleable_stat(ADIO_File fd) +{ + struct stat64 bgl_stat; + struct statfs bgl_statfs; + int rank, rc; + char * dir; + long buf[2]; + MPI_Comm_rank(fd->comm, &rank); + + if (rank == 0) { + /* Get the (real) underlying file system block size */ + rc = stat64(fd->filename, &bgl_stat); + if (rc >= 0) + { + buf[0] = bgl_stat.st_blksize; + DBGV_FPRINTF(stderr,"Successful stat '%s'. Blocksize=%ld\n", + fd->filename,bgl_stat.st_blksize); + } + else + { + DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n", + fd->filename,rc,errno); + } + /* Get the (real) underlying file system type so we can + * plan our fsync scaling strategy */ + rc = statfs(fd->filename,&bgl_statfs); + if (rc >= 0) + { + DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n", + fd->filename,bgl_statfs.f_type); + buf[1] = bgl_statfs.f_type; + } + else + { + DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n", + fd->filename,rc,errno); + ADIO_FileSysType_parentdir(fd->filename, &dir); + rc = statfs(dir,&bgl_statfs); + if (rc >= 0) + { + DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n",dir,bgl_statfs.f_type); + buf[1] = bgl_statfs.f_type; + } + else + { + /* Hmm. Guess we'll assume the worst-case, that it's not GPFS + * or PVFS2 below */ + buf[1] = -1; /* bogus magic number */ + DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno); + } + free(dir); + } + } + /* now we can broadcast the stat/statfs data to everyone else */ + MPI_Bcast(buf, 2, MPI_LONG, 0, fd->comm); + bgl_stat.st_blksize = buf[0]; + bgl_statfs.f_type = buf[1]; + + /* data from stat64 */ + /* store the blksize in the file system specific storage */ + ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize; + + /* data from statfs */ + if ((bgl_statfs.f_type == GPFS_SUPER_MAGIC) || + (bgl_statfs.f_type == PVFS2_SUPER_MAGIC)) + { + ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr = + ADIOI_BGL_FSYNC_AGGREGATION_ENABLED; + + /* Only one rank is an "fsync aggregator" because only one + * fsync is needed */ + if (rank == 0) + { + ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr |= + ADIOI_BGL_FSYNC_AGGREGATOR; + DBG_FPRINTF(stderr,"fsync aggregator %d\n",rank); + } + else ; /* aggregation enabled but this rank is not an aggregator*/ + } + else; /* Other filesystems default to no fsync aggregation */ +} + + void ADIOI_BGL_Open(ADIO_File fd, int *error_code) { int perm, old_mask, amode; @@ -41,8 +216,14 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code) amode = amode | O_RDWR; if (fd->access_mode & ADIO_EXCL) amode = amode | O_EXCL; - +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_open_a, 0, NULL); +#endif fd->fd_sys = open(fd->filename, amode, perm); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_open_b, 0, NULL); +#endif + DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno); fd->fd_direct = -1; if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) @@ -50,18 +231,29 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code) if(fd->fd_sys != -1) { - struct stat64 bgl_stat; - int rc = stat64(fd->filename,&bgl_stat); - if (rc >= 0) - { - /* store the blksize in the file system specific storage */ + struct stat64 bgl_stat; + struct statfs bgl_statfs; + char* dir; + int rc; + + /* Initialize the ad_bgl file system specific information */ AD_BGL_assert(fd->fs_ptr == NULL); fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs)); - ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize; -/* FPRINTF(stderr,"%s(%d):Successful stat '%s'. Blocksize=%ld\n",myname,__LINE__,fd->filename,bgl_stat.st_blksize);*/ - } -/* else - FPRINTF(stderr,"%s(%d):Stat '%s' failed with rc=%d, errno=%d\n",myname,__LINE__,fd->filename,rc,errno);*/ + + ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = 1048576; /* default to 1M */ + + /* default is no fsync aggregation */ + ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr = + ADIOI_BGL_FSYNC_AGGREGATION_DISABLED; + + +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL); +#endif + scaleable_stat(fd); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL); +#endif } if (fd->fd_sys == -1) { @@ -112,3 +304,6 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code) } else *error_code = MPI_SUCCESS; } +/* + *vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c index 0c441ab518..7f8e029d50 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c @@ -8,6 +8,7 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* + * * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -22,18 +23,25 @@ #include "mpe.h" #endif +#ifdef USE_DBG_LOGGING + #define RDCOLL_DEBUG 1 +#endif +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + /* prototypes of functions used for collective reads only. */ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, ADIO_Offset *offset_list, int + *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, @@ -47,7 +55,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node int iter, MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, ADIO_Offset *offset_list, int + *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, @@ -62,8 +70,8 @@ static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatl MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, int *len_list, - int *recv_size, + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, int contig_access_count, @@ -74,7 +82,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node extern void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset - offset, ADIO_Offset **offset_list_ptr, int + offset, ADIO_Offset **offset_list_ptr, ADIO_Offset **len_list_ptr, ADIO_Offset *start_offset_ptr, ADIO_Offset *end_offset_ptr, int *contig_access_count_ptr); @@ -99,25 +107,15 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count, whose request lies in this process's file domain. */ int i, filetype_is_contig, nprocs, nprocs_for_coll, myrank; - int contig_access_count, interleave_count = 0, buftype_is_contig; + int contig_access_count=0, interleave_count = 0, buftype_is_contig; int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs; ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off; ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL, *fd_end = NULL, *end_offsets = NULL; ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL; int ii; - int *len_list = NULL, *buf_idx = NULL; - - double io_time = 0., all_time, max_all_time; - double tstep1, max_tstep1; - double tstep1_1, max_tstep1_1; - double tstep1_2, max_tstep1_2; - double tstep1_3, max_tstep1_3; - double tstep2, max_tstep2; - double tstep3, max_tstep3; - double tstep4, max_tstep4; - double sum_sz; - + ADIO_Offset *len_list = NULL; + int *buf_idx = NULL; #if BGL_PROFILE BGLMPIO_T_CIO_RESET( 0, r ) #endif @@ -126,6 +124,14 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count, int bufsize, size; #endif +#if 0 +/* From common code - not implemented for bgl. */ + if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { + ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, + file_ptr_type, offset, status, error_code); + return; + } */ +#endif #ifdef PROFILE MPE_Log_event(13, 0, "start computation"); #endif @@ -157,14 +163,16 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count, BGLMPIO_T_CIO_SET_GET( 0, r, 1, 1, 1, BGLMPIO_CIO_GATHER, BGLMPIO_CIO_LCOMP ) #endif - /* for (i=0; idisp + (fd->etype_size) * offset; + off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset; ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, off, status, error_code); } @@ -263,7 +273,9 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count, else ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, - &fd_start, &fd_end, &fd_size); + &fd_start, &fd_end, + fd->hints->min_fdomain_size, &fd_size, + fd->hints->striping_unit); #if BGL_PROFILE BGLMPIO_T_CIO_SET_GET( 0, r, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART ) @@ -381,205 +393,11 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count, fd->fp_sys_posn = -1; /* set it to null. */ } -#if 0 -void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype - datatype, int file_ptr_type, ADIO_Offset - offset, ADIO_Offset **offset_list_ptr, int - **len_list_ptr, ADIO_Offset *start_offset_ptr, - ADIO_Offset *end_offset_ptr, int - *contig_access_count_ptr) -{ - int filetype_size, buftype_size, etype_size; - int i, j, k, frd_size=0, old_frd_size=0, st_index=0; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int bufsize, sum, n_etypes_in_filetype, size_in_filetype; - int contig_access_count, *len_list, flag, filetype_is_contig; - MPI_Aint filetype_extent, filetype_lb; - ADIOI_Flatlist_node *flat_file; - ADIO_Offset *offset_list, off, end_offset=0, disp; - -/* For this process's request, calculate the list of offsets and - lengths in the file and determine the start and end offsets. */ - - ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); - - MPI_Type_size(fd->filetype, &filetype_size); - MPI_Type_extent(fd->filetype, &filetype_extent); - MPI_Type_lb(fd->filetype, &filetype_lb); - MPI_Type_size(datatype, &buftype_size); - etype_size = fd->etype_size; - - if ( ! filetype_size ) { - *contig_access_count_ptr = 0; - *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int)); - /* 2 is for consistency. everywhere I malloc one more than needed */ - - offset_list = *offset_list_ptr; - len_list = *len_list_ptr; - offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; - len_list[0] = 0; - *start_offset_ptr = offset_list[0]; - *end_offset_ptr = offset_list[0] + len_list[0] - 1; - - return; - } - - if (filetype_is_contig) { - *contig_access_count_ptr = 1; - *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int)); - /* 2 is for consistency. everywhere I malloc one more than needed */ - - offset_list = *offset_list_ptr; - len_list = *len_list_ptr; - offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; - len_list[0] = bufcount * buftype_size; - *start_offset_ptr = offset_list[0]; - *end_offset_ptr = offset_list[0] + len_list[0] - 1; - - /* update file pointer */ - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = *end_offset_ptr + 1; - } - - else { - - /* First calculate what size of offset_list and len_list to allocate */ - - /* filetype already flattened in ADIO_Open or ADIO_Fcntl */ - flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) flat_file = flat_file->next; - disp = fd->disp; - - if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + - flat_file->blocklens[i] >= offset) - { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } - } - else { - n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); - size_in_filetype = etype_in_filetype * etype_size; - - sum = 0; - for (i=0; icount; i++) { - sum += flat_file->blocklens[i]; - if (sum > size_in_filetype) { - st_index = i; - frd_size = sum - size_in_filetype; - abs_off_in_filetype = flat_file->indices[i] + - size_in_filetype - (sum - flat_file->blocklens[i]); - break; - } - } - - /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + - abs_off_in_filetype; - } - - /* calculate how much space to allocate for offset_list, len_list */ - - old_frd_size = frd_size; - contig_access_count = i = 0; - j = st_index; - bufsize = buftype_size * bufcount; - frd_size = ADIOI_MIN(frd_size, bufsize); - while (i < bufsize) { - if (frd_size) contig_access_count++; - i += frd_size; - j = (j + 1) % flat_file->count; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); - } - - /* allocate space for offset_list and len_list */ - - *offset_list_ptr = (ADIO_Offset *) - ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int)); - /* +1 to avoid a 0-size malloc */ - - offset_list = *offset_list_ptr; - len_list = *len_list_ptr; - - /* find start offset, end offset, and fill in offset_list and len_list */ - - *start_offset_ptr = offset; /* calculated above */ - - i = k = 0; - j = st_index; - off = offset; - frd_size = ADIOI_MIN(old_frd_size, bufsize); - while (i < bufsize) { - if (frd_size) { - offset_list[k] = off; - len_list[k] = frd_size; - k++; - } - i += frd_size; - end_offset = off + frd_size - 1; - - /* Note: end_offset points to the last byte-offset that will be accessed. - e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ - - if (off + frd_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + - (ADIO_Offset) n_filetypes*filetype_extent) - { - off += frd_size; - /* did not reach end of contiguous block in filetype. - * no more I/O needed. off is incremented by frd_size. - */ - } - else { - if (j < (flat_file->count - 1)) j++; - else { - /* hit end of flattened filetype; - * start at beginning again - */ - j = 0; - n_filetypes++; - } - off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); - } - } - - /* update file pointer */ - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; - - *contig_access_count_ptr = contig_access_count; - *end_offset_ptr = end_offset; - } -} -#endif - static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, ADIO_Offset + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code) @@ -594,19 +412,21 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype array from a file, where each local array is 8Mbytes, requiring at least another 8Mbytes of temp space is unacceptable. */ - int i, j, m, size, ntimes, max_ntimes, buftype_is_contig; + int i, j, m, ntimes, max_ntimes, buftype_is_contig; ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off; char *read_buf = NULL, *tmp_buf; int *curr_offlen_ptr, *count, *send_size, *recv_size; - int *partial_send, *recd_from_proc, *start_pos, for_next_iter; - int real_size, req_len, flag, for_curr_iter, rank; + int *partial_send, *recd_from_proc, *start_pos; + /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/ + ADIO_Offset real_size, size, for_curr_iter, for_next_iter; + int req_len, flag, rank; MPI_Status status; ADIOI_Flatlist_node *flat_buf=NULL; MPI_Aint buftype_extent; int coll_bufsize; - +#ifdef RDCOLL_DEBUG int iii; - +#endif *error_code = MPI_SUCCESS; /* changed below if error */ /* only I/O errors are currently reported */ @@ -738,7 +558,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype #ifdef PROFILE MPE_Log_event(13, 0, "start computation"); #endif - size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); real_off = off - for_curr_iter; real_size = size + for_curr_iter; @@ -746,7 +566,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype for_next_iter = 0; for (i=0; icomm, requests+j); j++; @@ -960,8 +789,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j); j++; - /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", - myrank, recv_size[i], myrank+i+100*iter); */ +#ifdef RDCOLL_DEBUG + DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", + myrank, recv_size[i], myrank+i+100*iter); +#endif } } @@ -1006,7 +837,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node /* if noncontiguous, to the copies from the recv buffers */ if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, - offset_list, len_list, recv_size, + offset_list, len_list, (unsigned*)recv_size, requests, statuses, recd_from_proc, nprocs, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, @@ -1024,9 +855,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node if (recv_size[i]) ADIOI_Free(recv_buf[i]); ADIOI_Free(recv_buf); } +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif } - #define ADIOI_BUF_INCR \ { \ while (buf_incr) { \ @@ -1040,7 +873,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ buf_incr -= size_in_buf; \ @@ -1052,9 +885,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node { \ while (size) { \ size_in_buf = ADIOI_MIN(size, flat_buf_sz); \ + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)(buf + user_buf_idx)); \ + ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \ memcpy(((char *) buf) + user_buf_idx, \ &(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \ - recv_buf_idx[p] += size_in_buf; \ + recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \ user_buf_idx += size_in_buf; \ flat_buf_sz -= size_in_buf; \ if (!flat_buf_sz) { \ @@ -1064,7 +899,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ size -= size_in_buf; \ @@ -1073,11 +908,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIOI_BUF_INCR \ } - static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, int *len_list, - int *recv_size, + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, int contig_access_count, @@ -1086,13 +920,18 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIO_Offset *fd_end, MPI_Aint buftype_extent) { + /* this function is only called if buftype is not contig */ - int i, p, flat_buf_idx, size, buf_incr; - int flat_buf_sz, size_in_buf, n_buftypes; + int i, p, flat_buf_idx; + ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; + int n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; + /* Not sure unsigned is necessary, but it makes the math safer */ + unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx; - int *curr_from_proc, *done_from_proc, *recv_buf_idx; + ADIOI_UNREFERENCED_ARG(requests); + ADIOI_UNREFERENCED_ARG(statuses); /* curr_from_proc[p] = amount of data recd from proc. p that has already been accounted for so far @@ -1100,9 +939,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node filled into user buffer in previous iterations user_buf_idx = current location in user buffer recv_buf_idx[p] = current location in recv_buf of proc. p */ - curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); - done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); - recv_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); + done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); + recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); for (i=0; i < nprocs; i++) { recv_buf_idx[i] = curr_from_proc[i] = 0; @@ -1120,7 +959,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node for (i=0; i 0) { @@ -1140,29 +979,32 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node if (recv_buf_idx[p] < recv_size[p]) { if (curr_from_proc[p]+len > done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { - size = (int)ADIOI_MIN(curr_from_proc[p] + len - + size = ADIOI_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; ADIOI_BUF_INCR - buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]); + buf_incr = curr_from_proc[p]+len-done_from_proc[p]; + ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size)); curr_from_proc[p] = done_from_proc[p] + size; ADIOI_BUF_COPY } else { - size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]); - buf_incr = (int)len; - curr_from_proc[p] += size; + size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]); + buf_incr = len; + ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size)); + curr_from_proc[p] += (unsigned) size; ADIOI_BUF_COPY } } else { - curr_from_proc[p] += (int)len; - buf_incr = (int)len; + ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len)); + curr_from_proc[p] += (unsigned) len; + buf_incr = len; ADIOI_BUF_INCR } } else { - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } off += len; @@ -1179,7 +1021,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node static void ADIOI_R_Exchange_data_alltoallv( ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, ADIO_Offset *offset_list, int + *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, int *recd_from_proc, int nprocs, @@ -1192,9 +1034,8 @@ static void ADIOI_R_Exchange_data_alltoallv( { int i, j, k=0, tmp=0, nprocs_recv, nprocs_send; char **recv_buf = NULL; - MPI_Request *requests; - MPI_Datatype send_type; - MPI_Status *statuses; + MPI_Request *requests=NULL; + MPI_Status *statuses=NULL; int rtail, stail; char *sbuf_ptr, *from_ptr; int len; @@ -1238,7 +1079,8 @@ static void ADIOI_R_Exchange_data_alltoallv( } sbuf_ptr = all_send_buf + sdispls[i]; for (j=0; jcomm ); #if 0 - printf( "\tall_recv_buf = " ); - for (i=131072; i<131073; i++) { printf( "%2d,", all_recv_buf [i] ); } - printf( "\n" ); + DBG_FPRINTF(stderr, "\tall_recv_buf = " ); + for (i=131072; i<131073; i++) { DBG_FPRINTF(stderr, "%2d,", all_recv_buf [i] ); } + DBG_FPRINTF(stderr, "\n" ); #endif /* unpack at the receiver side */ if (nprocs_recv) { if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, - offset_list, len_list, recv_size, + offset_list, len_list, (unsigned*)recv_size, requests, statuses, /* never used inside */ recd_from_proc, nprocs, contig_access_count, diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c index 363d8c3436..b62c8f4fcf 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c @@ -21,9 +21,9 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err=-1, datatype_size, len; + int err=-1, datatype_size; + ADIO_Offset len; static char myname[] = "ADIOI_BGL_READCONTIG"; - #if BGL_PROFILE /* timing */ double io_time, io_time2; @@ -35,7 +35,8 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, #endif MPI_Type_size(datatype, &datatype_size); - len = datatype_size * count; + len = (ADIO_Offset)datatype_size * (ADIO_Offset)count; + ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */ #if BGL_PROFILE @@ -48,7 +49,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); if (bglmpio_timing2) io_time2 = MPI_Wtime(); - err = read(fd->fd_sys, buf, len); + err = read(fd->fd_sys, buf, (unsigned int)len); if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; @@ -64,7 +65,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); if (bglmpio_timing2) io_time2 = MPI_Wtime(); - err = read(fd->fd_sys, buf, len); + err = read(fd->fd_sys, buf, (unsigned int)len); if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_ind += err; @@ -79,7 +80,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, if (fd->atomicity) ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); - err = read(fd->fd_sys, buf, len); + err = read(fd->fd_sys, buf, (unsigned int)len); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ @@ -91,7 +92,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, if (fd->atomicity) ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); - err = read(fd->fd_sys, buf, len); + err = read(fd->fd_sys, buf, (unsigned int)len); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; @@ -120,12 +121,11 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, } - #define ADIOI_BUFFERED_READ \ { \ if (req_off >= readbuf_off + readbuf_len) { \ readbuf_off = req_off; \ - readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\ + readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\ lseek(fd->fd_sys, readbuf_off, SEEK_SET);\ if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\ err = read(fd->fd_sys, readbuf, readbuf_len);\ @@ -133,6 +133,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, if (err == -1) err_flag = 1; \ } \ while (req_len > readbuf_off + readbuf_len - req_off) { \ + ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\ partial_read = (int) (readbuf_off + readbuf_len - req_off); \ tmp_buf = (char *) ADIOI_Malloc(partial_read); \ memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \ @@ -141,7 +142,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, memcpy(readbuf, tmp_buf, partial_read); \ ADIOI_Free(tmp_buf); \ readbuf_off += readbuf_len-partial_read; \ - readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \ + readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \ end_offset-readbuf_off+1)); \ lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\ if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\ @@ -149,6 +150,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count, if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\ if (err == -1) err_flag = 1; \ } \ + ADIOI_Assert(req_len == (size_t)req_len); \ memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \ } @@ -160,20 +162,23 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, { /* offset is in units of etype relative to the filetype. */ + ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, err=-1, brd_size, frd_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; + ADIO_Offset i_offset, new_brd_size, brd_size, size; + int i, j, k, err=-1, st_index=0; + ADIO_Offset frd_size=0, new_frd_size, st_frd_size; + unsigned num, bufsize; + int n_etypes_in_filetype; + ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype; ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len, partial_read; + int filetype_size, etype_size, buftype_size, partial_read; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; - ADIO_Offset userbuf_off; + ADIO_Offset userbuf_off, req_len, sum; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; char *readbuf, *tmp_buf, *value; - int flag, st_frd_size, st_n_filetypes, readbuf_len; - int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize; - + int err_flag=0, info_flag; + unsigned max_bufsize, readbuf_len; static char myname[] = "ADIOI_BGL_READSTRIDED"; if (fd->hints->ds_read == ADIOI_HINT_DISABLE) { @@ -207,12 +212,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -226,13 +232,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; + fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; end_offset = off + bufsize - 1; readbuf_off = off; readbuf = (char *) ADIOI_Malloc(max_bufsize); - readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); + readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); /* if atomicity is true, lock (exclusive) the region to be accessed */ if (fd->atomicity) @@ -245,13 +251,16 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, if (err == -1) err_flag = 1; for (j=0; jcount; i++) { - userbuf_off = j*buftype_extent + flat_buf->indices[i]; - req_off = off; - req_len = flat_buf->blocklens[i]; - ADIOI_BUFFERED_READ - off += flat_buf->blocklens[i]; - } + { + int i; + for (i=0; icount; i++) { + userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; + req_off = off; + req_len = flat_buf->blocklens[i]; + ADIOI_BUFFERED_READ + off += flat_buf->blocklens[i]; + } + } if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); @@ -277,29 +286,36 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* frd_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + frd_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + frd_size = dist; + break; } } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -315,32 +331,63 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao: read request is within a single flat_file contig + * block e.g. with subarray types that actually describe the whole + * array */ + if (buftype_is_contig && bufsize <= frd_size) { + ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte that + * can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == frd_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } + /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ st_frd_size = frd_size; st_n_filetypes = n_filetypes; - i = 0; + i_offset = 0; j = st_index; off = offset; frd_size = ADIOI_MIN(st_frd_size, bufsize); - while (i < bufsize) { - i += frd_size; + while (i_offset < bufsize) { + i_offset += frd_size; end_offset = off + frd_size - 1; - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } - - off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock (exclusive) the region to be accessed */ @@ -350,7 +397,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, /* initial read into readbuf */ readbuf_off = offset; readbuf = (char *) ADIOI_Malloc(max_bufsize); - readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); + readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); lseek(fd->fd_sys, offset, SEEK_SET); if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len); @@ -364,12 +411,12 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, /* contiguous in memory, noncontiguous in file. should be the most common case. */ - i = 0; + i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; frd_size = ADIOI_MIN(st_frd_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (frd_size) { /* TYPE_UB and TYPE_LB can result in frd_size = 0. save system call in such cases */ @@ -378,25 +425,26 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, req_off = off; req_len = frd_size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_READ } - i += frd_size; + i_offset += frd_size; if (off + frd_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent) + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += frd_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by frd_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes*(ADIO_Offset)filetype_extent; + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } } } @@ -408,7 +456,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -423,7 +471,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_READ } @@ -432,18 +480,19 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, if (size == frd_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_frd_size = flat_file->blocklens[j]; if (size != brd_size) { - i += size; + i_offset += size; new_brd_size -= size; } } @@ -453,7 +502,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + + i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + flat_buf->indices[k]); new_brd_size = flat_buf->blocklens[k]; if (size != frd_size) { @@ -461,6 +510,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count, new_frd_size -= size; } } + ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size)); num += size; frd_size = new_frd_size; brd_size = new_brd_size; diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c index 4959fa3eeb..074186efe0 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c @@ -3,7 +3,13 @@ /* ---------------------------------------------------------------- */ /** * \file ad_bgl_tuning.c - * \brief ??? + * \brief defines ad_bgl performance tuning + */ + +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. */ /*--------------------------------------------------------------------- @@ -26,6 +32,40 @@ double bglmpio_prof_cw [BGLMPIO_CIO_LAST]; double bglmpio_prof_cr [BGLMPIO_CIO_LAST]; /* set internal variables for tuning environment variables */ +/** \page env_vars Environment Variables + * - BGLMPIO_COMM - Define how data is exchanged on collective + * reads and writes. Possible values: + * - 0 - Use MPI_Alltoallv. + * - 1 - Use MPI_Isend/MPI_Irecv. + * - Default is 0. + * + * - BGLMPIO_TIMING - collect timing breakdown for MPI I/O collective calls. + * Must also compile the library with BGL_PROFILE defined. Possible values: + * - 0 - Do not collect/report timing. + * - 1 - Collect/report timing. + * - Default is 0. + * + * - BGLMPIO_TIMING2 - collect additional averages for MPI I/O collective calls. + * Must also compile the library with BGL_PROFILE defined. Possible values: + * - 0 - Do not collect/report averages. + * - 1 - Collect/report averages. + * - Default is 0. + * + * - BGLMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated + * for aggregator collective i/o. Possible values: + * - 0 - Use two MPI_Allgather's to collect starting and ending offsets. + * - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets. + * - Default is 1. + * + * - BGLMPIO_TUNEBLOCKING - Tune how aggregate file domains are + * calculated (block size). Possible values: + * - 0 - Evenly calculate file domains across aggregators. Also use + * MPI_Isend/MPI_Irecv to exchange domain information. + * - 1 - Align file domains with the underlying file system's block size. Also use + * MPI_Alltoallv to exchange domain information. + * - Default is 1. + * +*/ void ad_bgl_get_env_vars() { char *x; diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c index c8fabc1818..c7e32df6b7 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c @@ -18,6 +18,9 @@ #include "ad_bgl_pset.h" #include "ad_bgl_aggrs.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif #ifdef PROFILE #include "mpe.h" #endif @@ -26,13 +29,13 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, ADIO_Offset + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, @@ -49,7 +52,7 @@ static void ADIOI_W_Exchange_data_alltoallv( char *write_buf, /* 1 */ ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, - int *len_list, int *send_size, int *recv_size, + ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, /* 2 */ int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, int myrank, @@ -65,7 +68,7 @@ static void ADIOI_W_Exchange_data_alltoallv( int *error_code); static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, ADIO_Offset @@ -76,7 +79,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node MPI_Aint buftype_extent); static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, ADIO_Offset @@ -118,26 +121,27 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL; int ii; - int *buf_idx = NULL, *len_list = NULL; - - double io_time = 0, all_time, max_all_time; - double tstep1, max_tstep1; - double tstep1_1, max_tstep1_1; - double tstep1_2, max_tstep1_2; - double tstep1_3, max_tstep1_3; - double tstep2, max_tstep2; - double tstep3, max_tstep3; - double tstep4, max_tstep4; - double sum_sz; - + int *buf_idx = NULL; + ADIO_Offset *len_list = NULL; #if BGL_PROFILE BGLMPIO_T_CIO_RESET( 0, w ) #endif - +#if 0 + /* From common code - not implemented for bgl.*/ + int old_error, tmp_error; +#endif #ifdef PROFILE MPE_Log_event(13, 0, "start computation"); #endif +#if 0 +/* From common code - not implemented for bgl. */ + if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { + ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype, + file_ptr_type, offset, status, error_code); + return; + } +#endif MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); @@ -207,7 +211,8 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, /* are the accesses of different processes interleaved? */ for (i=1; idisp + (fd->etype_size) * offset; + off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset; ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, off, status, error_code); @@ -260,7 +265,9 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, else ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, - &fd_start, &fd_end, &fd_size); + &fd_start, &fd_end, + fd->hints->min_fdomain_size, &fd_size, + fd->hints->striping_unit); #if BGL_PROFILE BGLMPIO_T_CIO_SET_GET( 0, w, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART ) @@ -329,9 +336,50 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, BGLMPIO_T_CIO_REPORT( 0, w, fd, myrank ) #endif - +#if 0 + /* From common code - not implemented for bgl. + * + * If this collective write is followed by an independent write, + * it's possible to have those subsequent writes on other processes + * race ahead and sneak in before the read-modify-write completes. + * We carry out a collective communication at the end here so no one + * can start independent i/o before collective I/O completes. + * + * need to do some gymnastics with the error codes so that if something + * went wrong, all processes report error, but if a process has a more + * specific error code, we can still have that process report the + * additional information */ + old_error = *error_code; + if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO; + + /* optimization: if only one process performing i/o, we can perform + * a less-expensive Bcast */ +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_postwrite_a, 0, NULL ); +#endif + if (fd->hints->cb_nodes == 1) + MPI_Bcast(error_code, 1, MPI_INT, + fd->hints->ranklist[0], fd->comm); + else { + tmp_error = *error_code; + MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, + MPI_MAX, fd->comm); + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL ); +#endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5012, 0, NULL); +#endif + + if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) ) + *error_code = old_error; + + +#endif /* free all memory allocated for collective I/O */ + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); for (i=0; ifp_sys_posn = -1; /* set it to null. */ +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5013, 0, NULL); +#endif } @@ -371,12 +422,12 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count, * code is created and returned in error_code. */ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype - datatype, int nprocs, int myrank, + datatype, int nprocs, + int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, - ADIO_Offset - min_st_offset, ADIO_Offset fd_size, + ADIO_Offset *len_list, int contig_access_count, + ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code) { @@ -389,7 +440,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype array to a file, where each local array is 8Mbytes, requiring at least another 8Mbytes of temp space is unacceptable. */ - int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig; + /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/ + ADIO_Offset size=0; + int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig; ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off; char *write_buf=NULL; int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size; @@ -410,7 +463,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype That gives the no. of communication phases as well. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); coll_bufsize = atoi(value); ADIOI_Free(value); @@ -526,7 +579,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype #endif for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0; - size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); for (i=0; i < nprocs; i++) { if (others_req[i].count) { @@ -550,12 +603,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype } if (req_off < off + size) { count[i]++; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off)); MPI_Address(write_buf+req_off-off, &(others_req[i].mem_ptrs[j])); - recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size - - req_off, req_len)); + ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off)); + recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, + (unsigned)req_len)); - if (off+size-req_off < req_len) + if (off+size-req_off < (unsigned)req_len) { partial_recv[i] = (int) (off + size - req_off); @@ -618,7 +673,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype if (count[i]) flag = 1; if (flag) { - ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + ADIOI_Assert(size == (int)size); + ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, error_code); if (*error_code != MPI_SUCCESS) return; } @@ -678,7 +734,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype */ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, @@ -758,19 +814,26 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, } ADIOI_Free(tmp_len); -/* check if there are any holes */ + /* check if there are any holes. If yes, must do read-modify-write. + * holes can be in three places. 'middle' is what you'd expect: the + * processes are operating on noncontigous data. But holes can also show + * up at the beginning or end of the file domain (see John Bent ROMIO REQ + * #835). Missing these holes would result in us writing more data than + * recieved by everyone else. */ *hole = 0; - /* See if there are holes before the first request or after the last request*/ - if((srt_off[0] > off) || - ((srt_off[sum-1] + srt_len[sum-1]) < (off + size))) - { - *hole = 1; - } - else /* See if there are holes between the requests, if there are more than one */ - for (i=0; i srt_len[0]) srt_len[0] = new_len; + } + else + break; + } + if (i < sum || size != srt_len[0]) /* hole in middle or end */ + *hole = 1; } ADIOI_Free(srt_off); @@ -821,6 +884,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, /* post sends. if buftype_is_contig, data can be directly sent from user buf at location given by buf_idx. else use send_buf. */ +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5032, 0, NULL); +#endif if (buftype_is_contig) { j = 0; for (i=0; i < nprocs; i++) @@ -895,6 +961,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses); #endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { @@ -918,7 +987,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ buf_incr -= size_in_buf; \ @@ -930,6 +999,8 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, { \ while (size) { \ size_in_buf = ADIOI_MIN(size, flat_buf_sz); \ + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \ + ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \ memcpy(&(send_buf[p][send_buf_idx[p]]), \ ((char *) buf) + user_buf_idx, size_in_buf); \ send_buf_idx[p] += size_in_buf; \ @@ -942,7 +1013,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ size -= size_in_buf; \ @@ -951,11 +1022,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_BUF_INCR \ } - - static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, @@ -967,8 +1036,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node { /* this function is only called if buftype is not contig */ - int i, p, flat_buf_idx, size; - int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes; + int i, p, flat_buf_idx; + ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; + int jj, n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; /* curr_to_proc[p] = amount of data sent to proc. p that has already @@ -995,7 +1065,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = (int)ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR - buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]); + ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p])); + buf_incr = curr_to_proc[p] + len - done_to_proc[p]; + ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size)); curr_to_proc[p] = done_to_proc[p] + size; ADIOI_BUF_COPY } else { - size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); - buf_incr = (int)len; + size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); + buf_incr = len; + ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size)); curr_to_proc[p] += size; ADIOI_BUF_COPY } @@ -1036,13 +1109,14 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node } } else { - curr_to_proc[p] += (int)len; - buf_incr = (int)len; + ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len)); + curr_to_proc[p] += len; + buf_incr = len; ADIOI_BUF_INCR } } else { - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } off += len; @@ -1181,7 +1255,7 @@ static void ADIOI_W_Exchange_data_alltoallv( char *write_buf, /* 1 */ ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, - int *len_list, int *send_size, int *recv_size, + ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, /* 2 */ int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, int myrank, @@ -1196,11 +1270,10 @@ static void ADIOI_W_Exchange_data_alltoallv( int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code) { - int i, j, k=0, tmp=0, nprocs_recv, nprocs_send, erri, *tmp_len, err; + int i, j, k=0, nprocs_recv, nprocs_send, *tmp_len, err; char **send_buf = NULL; - MPI_Request *requests, *send_req; - MPI_Datatype recv_type; - MPI_Status *statuses, status; + MPI_Request *send_req=NULL; + MPI_Status status; int rtail, stail; char *sbuf_ptr, *to_ptr; int len; @@ -1324,7 +1397,8 @@ static void ADIOI_W_Exchange_data_alltoallv( sbuf_ptr = all_recv_buf + rdispls[i]; for (j=0; j done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = (int)ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR - buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]); + ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p])); + buf_incr = curr_to_proc[p] + len - done_to_proc[p]; + ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size)); curr_to_proc[p] = done_to_proc[p] + size; ADIOI_BUF_COPY } else { - size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); - buf_incr = (int)len; + size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); + buf_incr = len; + ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size)); curr_to_proc[p] += size; ADIOI_BUF_COPY } @@ -1433,13 +1511,14 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis */ } else { + ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len)); curr_to_proc[p] += (int)len; - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } } else { - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } off += len; diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c index b482bb3ac3..6fcd569fef 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c @@ -17,13 +17,20 @@ #include "ad_bgl_tuning.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err=-1, datatype_size, len; + int err=-1, datatype_size; + ADIO_Offset len; static char myname[] = "ADIOI_BGL_WRITECONTIG"; - +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5036, 0, NULL); +#endif #if BGL_PROFILE /* timing */ double io_time, io_time2; @@ -35,7 +42,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, #endif MPI_Type_size(datatype, &datatype_size); - len = datatype_size * count; + len = (ADIO_Offset)datatype_size * (ADIO_Offset)count; + ADIOI_Assert(len == (unsigned int) len); /* write takes an unsigned int parm */ #if BGL_PROFILE @@ -46,7 +54,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2); ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); if (bglmpio_timing2) io_time2 = MPI_Wtime(); - err = write(fd->fd_sys, buf, len); + err = write(fd->fd_sys, buf, (unsigned int)len); if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; @@ -60,7 +68,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2); ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); if (bglmpio_timing2) io_time2 = MPI_Wtime(); - err = write(fd->fd_sys, buf, len); + err = write(fd->fd_sys, buf, (unsigned int)len); if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_ind += err; @@ -73,7 +81,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (fd->fp_sys_posn != offset) lseek(fd->fd_sys, offset, SEEK_SET); ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); - err = write(fd->fd_sys, buf, len); + err = write(fd->fd_sys, buf, (unsigned int)len); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ @@ -83,7 +91,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (fd->fp_sys_posn != fd->fp_ind) lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); - err = write(fd->fd_sys, buf, len); + err = write(fd->fd_sys, buf, (unsigned int)len); ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; @@ -110,11 +118,12 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, #endif *error_code = MPI_SUCCESS; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5037, 0, NULL); +#endif } - - #define ADIOI_BUFFERED_WRITE \ { \ if (req_off >= writebuf_off + writebuf_len) { \ @@ -123,7 +132,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (err == -1) err_flag = 1; \ writebuf_off = req_off; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ lseek(fd->fd_sys, writebuf_off, SEEK_SET); \ err = read(fd->fd_sys, writebuf, writebuf_len); \ @@ -135,7 +144,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, return; \ } \ } \ - write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\ memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\ while (write_sz != req_len) { \ lseek(fd->fd_sys, writebuf_off, SEEK_SET); \ @@ -145,7 +155,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, req_len -= write_sz; \ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ lseek(fd->fd_sys, writebuf_off, SEEK_SET); \ err = read(fd->fd_sys, writebuf, writebuf_len); \ @@ -173,9 +183,10 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (err == -1) err_flag = 1; \ writebuf_off = req_off; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ } \ - write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\ memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\ while (write_sz != req_len) { \ lseek(fd->fd_sys, writebuf_off, SEEK_SET); \ @@ -186,7 +197,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, req_len -= write_sz; \ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ write_sz = ADIOI_MIN(req_len, writebuf_len); \ memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\ } \ @@ -201,19 +212,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, { /* offset is in units of etype relative to the filetype. */ + + ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; + ADIO_Offset i_offset, sum, size_in_filetype; + int i, j, k, err=-1, st_index=0; + int n_etypes_in_filetype; + ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes; ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len; + int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; char *writebuf, *value; - int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz; - int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize; + unsigned bufsize, writebuf_len, max_bufsize, write_sz; + int err_flag=0, info_flag; + ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len; static char myname[] = "ADIOI_BGL_WRITESTRIDED"; if (fd->hints->ds_write == ADIOI_HINT_DISABLE) { @@ -247,12 +262,13 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -272,20 +288,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, end_offset = off + bufsize - 1; writebuf_off = off; writebuf = (char *) ADIOI_Malloc(max_bufsize); - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1)); + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1)); /* if atomicity is true, lock the region to be accessed */ if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); for (j=0; jcount; i++) { - userbuf_off = j*buftype_extent + flat_buf->indices[i]; + userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; req_off = off; req_len = flat_buf->blocklens[i]; ADIOI_BUFFERED_WRITE_WITHOUT_READ off += flat_buf->blocklens[i]; } + } /* write the buffer out finally */ lseek(fd->fd_sys, writebuf_off, SEEK_SET); @@ -317,29 +336,37 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - fwr_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* fwr_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + fwr_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + fwr_size = dist; + break; + } + } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; } else { + int i; n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -355,32 +382,64 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao:write request is within single flat_file contig block*/ + /* this could happen, for example, with subarray types that are + * actually fairly contiguous */ + if (buftype_is_contig && bufsize <= fwr_size) { + ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte + * that can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == fwr_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + (ADIO_Offset)n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/ st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - i = 0; + i_offset = 0; j = st_index; off = offset; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); - while (i < bufsize) { - i += fwr_size; + while (i_offset < bufsize) { + i_offset += fwr_size; end_offset = off + fwr_size - 1; - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } - off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + off = disp + flat_file->indices[j] + + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock the region to be accessed */ @@ -390,7 +449,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, /* initial read for the read-modify-write */ writebuf_off = offset; writebuf = (char *) ADIOI_Malloc(max_bufsize); - writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1)); + writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1)); if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); lseek(fd->fd_sys, writebuf_off, SEEK_SET); err = read(fd->fd_sys, writebuf, writebuf_len); @@ -408,39 +467,41 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, /* contiguous in memory, noncontiguous in file. should be the most common case. */ - i = 0; + i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (fwr_size) { /* TYPE_UB and TYPE_LB can result in fwr_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); - err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/ + err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/ req_off = off; req_len = fwr_size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_WRITE } - i += fwr_size; + i_offset += fwr_size; if (off + fwr_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent) + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += fwr_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by fwr_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], + bufsize-i_offset); } } } @@ -452,7 +513,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -463,11 +524,11 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, size = ADIOI_MIN(fwr_size, bwr_size); if (size) { /* lseek(fd->fd_sys, off, SEEK_SET); - err = write(fd->fd_sys, ((char *) buf) + i, size); */ + err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */ req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_WRITE } @@ -476,18 +537,19 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, if (size == fwr_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[j]; if (size != bwr_size) { - i += size; + i_offset += size; new_bwr_size -= size; } } @@ -497,8 +559,8 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache b/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache deleted file mode 100644 index 7b79bb6e95..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am index 991f779520..ec0f951f41 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am @@ -21,4 +21,6 @@ include $(top_srcdir)/Makefile.options noinst_LTLIBRARIES = libadio_bglockless.la libadio_bglockless_la_SOURCES = \ - ad_bglockless.c + ad_bglockless.c \ + ad_bglockless.h \ + ad_bglockless_features.c diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c index 725590d1b9..759616523c 100644 --- a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c +++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c @@ -6,12 +6,14 @@ */ #include "../ad_bgl/ad_bgl.h" +#include "ad_bglockless.h" /* adioi.h has the ADIOI_Fns_struct define */ #include "adioi.h" struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = { ADIOI_BGL_Open, /* Open */ + ADIOI_GEN_OpenColl, /* Collective open */ ADIOI_GEN_ReadContig, /* ReadContig */ ADIOI_GEN_WriteContig, /* WriteContig */ ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */ @@ -35,7 +37,8 @@ struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = { ADIOI_GEN_IOComplete, /* WriteComplete */ ADIOI_GEN_IreadStrided, /* IreadStrided */ ADIOI_GEN_IwriteStrided, /* IwriteStrided */ - ADIOI_GEN_Flush, /* Flush */ + ADIOI_BGL_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_BGLOCKLESS_Feature /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h new file mode 100644 index 0000000000..5b6a1fc40f --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h @@ -0,0 +1,14 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2008 Uchicago Argonne LLC + * See COPYRIGHT notice in top-level directory. + */ + +#ifndef AD_BGLOCKLESS_INCLUDE +#define AD_PVFS2_INCLUDE + +int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag); + +#endif + diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c new file mode 100644 index 0000000000..4153c5e409 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c @@ -0,0 +1,15 @@ +#include "adio.h" + +int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_SCALABLE_OPEN: + return 1; + case ADIO_SHARED_FP: + case ADIO_LOCKS: + case ADIO_SEQUENTIAL: + case ADIO_DATA_SIEVING_WRITES: + default: + return 0; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am index f2905f0894..9e0dd5dff9 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am @@ -25,6 +25,7 @@ libadio_gridftp_la_SOURCES = \ ad_gridftp_close.c \ ad_gridftp_delete.c \ ad_gridftp_fcntl.c \ + ad_gridftp_features.c \ ad_gridftp_flush.c \ ad_gridftp_hints.c \ ad_gridftp_open.c \ diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c index 734f93628f..fe429c9e4b 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c @@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_GRIDFTP_operations = { ADIOI_GRIDFTP_Flush, /* Flush */ ADIOI_GRIDFTP_Resize, /* Resize */ ADIOI_GRIDFTP_Delete, /* Delete */ + ADIOI_GRIDFTP_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c new file mode 100644 index 0000000000..6eea52734b --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c @@ -0,0 +1,12 @@ +int ADIOI_GRIDFTP_Feature (ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_SCALABLE_OPEN: + case ADIO_SHARED_FP: + case ADIO_LOCKS: + case ADIO_SEQUENTIAL: + case ADIO_DATA_SIEVING_WRITES: + default: + return 0; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c index ca96e6f26d..124bef6174 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c @@ -56,8 +56,8 @@ void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Info_get_valuelen(users_info,key,&valuelen,&flag); if (flag) { - MPI_Info_get(users_info,key,valuelen,value,&flag); - if (flag) MPI_Info_set(fd->info,key,value); + ADIOI_Info_get(users_info,key,valuelen,value,&flag); + if (flag) ADIOI_Info_set(fd->info,key,value); } } } diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c index 26a7afd7cc..d7b175ed86 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c @@ -136,7 +136,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) oattr[] (eg. parallelism, striping, etc.) goes here */ if ( fd->info!=MPI_INFO_NULL ) { - MPI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) && @@ -153,7 +153,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); } - MPI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { int nftpthreads; @@ -170,14 +170,14 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) } } - MPI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { /* if set to "true" or "enable", set up round-robin block layout */ if ( !strncmp("true",hintval,4) || !strncmp("TRUE",hintval,4) || !strncmp("enable",hintval,4) || !strncmp("ENABLE",hintval,4) ) { - MPI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { int striping_factor; @@ -197,7 +197,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) } } - MPI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { /* set tcp buffer size */ @@ -214,7 +214,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) } } - MPI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound); + ADIOI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { globus_ftp_control_type_t filetype; @@ -340,84 +340,4 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) } } num_gridftp_handles++; - -#if 0 - /* Debugging info for testing PASV mode behind firewalls */ - if ( myrank==0 ) - { - globus_bool_t striped; - globus_ftp_control_mode_t mode; - globus_ftp_control_type_t filetype; - globus_ftp_control_parallelism_t parallelism; - - FPRINTF(stderr,"--gridftp details for %s--\n", - fd->filename); - - /* - FPRINTF(stderr,"Connection caching: "); - globus_ftp_client_handleattr_get_cache_all(&hattr,&cached); - if ( cached==GLOBUS_TRUE ) - FPRINTF(stderr,"Y\n"); - else - FPRINTF(stderr,"N\n"); - */ - - FPRINTF(stderr,"Control mode: "); - globus_ftp_client_operationattr_get_mode(&(oattr[fd->fd_sys]),&mode); - if ( mode==GLOBUS_FTP_CONTROL_MODE_BLOCK ) - FPRINTF(stderr,"block\n"); - else if ( mode==GLOBUS_FTP_CONTROL_MODE_COMPRESSED ) - FPRINTF(stderr,"compressed\n"); - else if ( mode==GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK ) - FPRINTF(stderr,"extended block\n"); - else if ( mode==GLOBUS_FTP_CONTROL_MODE_STREAM ) - FPRINTF(stderr,"stream\n"); - else - FPRINTF(stderr,"unknown\n"); - - FPRINTF(stderr,"File type: "); - globus_ftp_client_operationattr_get_type(&(oattr[fd->fd_sys]),&filetype); - if ( filetype==GLOBUS_FTP_CONTROL_TYPE_ASCII ) - FPRINTF(stderr,"ASCII\n"); - else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_IMAGE ) - FPRINTF(stderr,"binary\n"); - else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_EBCDIC ) - FPRINTF(stderr,"EBCDIC\n"); - else - FPRINTF(stderr,"unknown\n"); - - FPRINTF(stderr,"Parallelism: "); - globus_ftp_client_operationattr_get_parallelism(&(oattr[fd->fd_sys]),¶llelism); - if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_NONE ) - FPRINTF(stderr,"none\n"); - else if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_FIXED ) - FPRINTF(stderr,"fixed with %d streams\n",parallelism.fixed.size); - else - FPRINTF(stderr,"unknown\n"); - - FPRINTF(stderr,"Striping: "); - globus_ftp_client_operationattr_get_striped(&(oattr[fd->fd_sys]),&striped); - if ( striped==GLOBUS_TRUE ) - { - globus_ftp_control_layout_t layout; - - FPRINTF(stderr,"Y\nLayout: "); - globus_ftp_client_operationattr_get_layout(&(oattr[fd->fd_sys]), - &layout); - if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_NONE ) - FPRINTF(stderr,"none\n"); - else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_PARTITIONED ) - FPRINTF(stderr,"partitioned, size=%d\n",layout.partitioned.size); - else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_BLOCKED_ROUND_ROBIN ) - FPRINTF(stderr,"round-robin, block size=%d\n",layout.round_robin.block_size); - else - FPRINTF(stderr,"unknown\n"); - } - else - FPRINTF(stderr,"N\n"); - - fflush(stderr); - } -#endif - } diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c index 31a93beaa4..a6af99d50e 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c @@ -50,10 +50,6 @@ static void readcontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle, readcontig_data_cb: buffer 0x404c0008 length 65536 offset 32112640 eof 0 readcontig_data_cb: buffer 0x404d0008 length 65536 offset 32178176 eof 0 */ -#if 0 - FPRINTF(stderr, "%s: buffer %p length %d offset %Ld eof %d\n", - __func__, buffer, length, offset, eof); -#endif if ( !eof ) globus_ftp_client_register_read(handle, buffer+length, diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c index 1dd25c7f26..869ecef8d6 100644 --- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c @@ -364,10 +364,6 @@ void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count, { fd->fp_ind += extent; fd->fp_sys_posn = fd->fp_ind; -#if 0 - FPRINTF(stdout, "[%d/%d] new file position is %Ld\n", myrank, - nprocs, (long long) fd->fp_ind); -#endif } else { fd->fp_sys_posn = offset + extent; diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c index db42414c64..b17227fd41 100644 --- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c +++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c @@ -8,6 +8,9 @@ #include "ad_hfs.h" #include "adio_extern.h" +#ifndef HAVE_LSEEK64 +#define lseek64 lseek +#endif void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) { int i, ntimes, err; diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c index 1c04c7af09..8e56a4571e 100644 --- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c @@ -7,6 +7,10 @@ #include "ad_hfs.h" +#ifndef HAVE_LSEEK64 +#define lseek64 lseek +#endif + void ADIOI_HFS_Open(ADIO_File fd, int *error_code) { int perm, old_mask, amode; diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c index b28e3f6ee9..06e7fec896 100644 --- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c @@ -7,6 +7,10 @@ #include "ad_hfs.h" +#ifndef HAVE_LSEEK64 +#define lseek64 lseek +#endif + void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c index a4a41a0aa8..ab42d31bb6 100644 --- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c @@ -7,6 +7,10 @@ #include "ad_hfs.h" +#ifndef HAVE_LSEEK64 +#define lseek64 lseek +#endif + void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache b/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache deleted file mode 100644 index 3e2ff262a8..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am index 67107e302f..6e20ce8919 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am @@ -24,8 +24,11 @@ EXTRA_DIST = README noinst_LTLIBRARIES = libadio_lustre.la libadio_lustre_la_SOURCES = \ ad_lustre.c \ + ad_lustre_aggregate.c \ ad_lustre_fcntl.c \ ad_lustre.h \ ad_lustre_hints.c \ ad_lustre_open.c \ - ad_lustre_rwcontig.c + ad_lustre_wrcoll.c \ + ad_lustre_rwcontig.c \ + ad_lustre_wrstr.c diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/README b/ompi/mca/io/romio/romio/adio/ad_lustre/README index 545ef3485f..a217c0f8fe 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/README +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/README @@ -4,6 +4,21 @@ Upcoming soon: Further out: o To post the code for ParColl (Partitioned collective IO) +----------------------------------------------------- +V05: +----------------------------------------------------- +Improved data redistribution + o Improve I/O pattern identification. Besides checking interleaving, + if request I/O size is small, collective I/O will be performed. + The hint bigsize can be used to define the req size value. + o Provide hint CO for load balancing to control the number of + IO clients for each OST + o Produce stripe-contiguous I/O pattern that Lustre prefers + o Control read-modify-write in data sieving in collective IO + by hint ds_in_coll. + o Reduce extent lock conflicts by make each OST accessed by one or + more constant clients. + ----------------------------------------------------- V04: ----------------------------------------------------- diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c index 1a465f85aa..08809e5c65 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c @@ -1,24 +1,27 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 2001 University of Chicago. +/* + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group */ #include "ad_lustre.h" struct ADIOI_Fns_struct ADIO_LUSTRE_operations = { ADIOI_LUSTRE_Open, /* Open */ + ADIOI_GEN_OpenColl, /* OpenColl */ ADIOI_LUSTRE_ReadContig, /* ReadContig */ ADIOI_LUSTRE_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ - ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */ + ADIOI_LUSTRE_WriteStridedColl, /* WriteStridedColl */ ADIOI_GEN_SeekIndividual, /* SeekIndividual */ ADIOI_GEN_Fcntl, /* Fcntl */ ADIOI_LUSTRE_SetInfo, /* SetInfo */ ADIOI_GEN_ReadStrided, /* ReadStrided */ - ADIOI_GEN_WriteStrided, /* WriteStrided */ + ADIOI_LUSTRE_WriteStrided, /* WriteStrided */ ADIOI_GEN_Close, /* Close */ #if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE) ADIOI_GEN_IreadContig, /* IreadContig */ @@ -36,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_LUSTRE_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h index a0fbdc40e2..3252907596 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h @@ -1,9 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group */ #ifndef AD_UNIX_INCLUDE @@ -17,6 +19,7 @@ #ifdef __linux__ # include /* necessary for: */ +# include # define __USE_GNU /* O_DIRECT and */ # include /* IO operations */ # undef __USE_GNU @@ -24,7 +27,7 @@ /*#include */ #include -#include "lustre/lustre_user.h" +#include #include "adio.h" /*#include "adioi.h"*/ @@ -41,24 +44,48 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code); void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code); -void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, int - *error_code); -void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, int - *error_code); +void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); +void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); +void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, int - *error_code); + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, int - *error_code); + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); +void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code); void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code); void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); +/* the lustre utilities: */ +int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count, + ADIO_Offset *len_list, int nprocs); + +void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr, + int mode); +void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, + ADIO_Offset *len_list, int contig_access_count, + int *striping_info, int nprocs, + int *count_my_req_procs_ptr, + int **count_my_req_per_proc_ptr, + ADIOI_Access **my_req_ptr, + int ***buf_idx_ptr); + +int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off, + ADIO_Offset *len, int *striping_info); #endif /* End of AD_UNIX_INCLUDE */ diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c new file mode 100644 index 0000000000..203b080edb --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c @@ -0,0 +1,322 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group + */ + +#include "ad_lustre.h" +#include "adio_extern.h" + +#undef AGG_DEBUG + +void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr, + int mode) +{ + int *striping_info = NULL; + /* get striping information: + * striping_info[0]: stripe_size + * striping_info[1]: stripe_count + * striping_info[2]: avail_cb_nodes + */ + int stripe_size, stripe_count, CO = 1; + int avail_cb_nodes, divisor, nprocs_for_coll = fd->hints->cb_nodes; + + /* Get hints value */ + /* stripe size */ + stripe_size = fd->hints->striping_unit; + /* stripe count */ + /* stripe_size and stripe_count have been validated in ADIOI_LUSTRE_Open() */ + stripe_count = fd->hints->striping_factor; + + /* Calculate the available number of I/O clients */ + if (!mode) { + /* for collective read, + * if "CO" clients access the same OST simultaneously, + * the OST disk seek time would be much. So, to avoid this, + * it might be better if 1 client only accesses 1 OST. + * So, we set CO = 1 to meet the above requirement. + */ + CO = 1; + /*XXX: maybe there are other better way for collective read */ + } else { + /* CO also has been validated in ADIOI_LUSTRE_Open(), >0 */ + CO = fd->hints->fs_hints.lustre.co_ratio; + } + /* Calculate how many IO clients we need */ + /* Algorithm courtesy Pascal Deveze (pascal.deveze@bull.net) */ + /* To avoid extent lock conflicts, + * avail_cb_nodes should either + * - be a multiple of stripe_count, + * - or divide stripe_count exactly + * so that each OST is accessed by a maximum of CO constant clients. */ + if (nprocs_for_coll >= stripe_count) + /* avail_cb_nodes should be a multiple of stripe_count and the number + * of procs per OST should be limited to the minimum between + * nprocs_for_coll/stripe_count and CO + * + * e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then + * avail_cb_nodes should be equal to 40 */ + avail_cb_nodes = + stripe_count * ADIOI_MIN(nprocs_for_coll/stripe_count, CO); + else { + /* nprocs_for_coll is less than stripe_count */ + /* avail_cb_nodes should divide stripe_count */ + /* e.g. if stripe_count=60 and nprocs_for_coll=8 then + * avail_cb_nodes should be egal to 6 */ + /* This could be done with : + while (stripe_count % avail_cb_nodes != 0) avail_cb_nodes--; + but this can be optimized for large values of nprocs_for_coll and + stripe_count */ + divisor = 2; + avail_cb_nodes = 1; + /* try to divise */ + while (stripe_count >= divisor*divisor) { + if ((stripe_count % divisor) == 0) { + if (stripe_count/divisor <= nprocs_for_coll) { + /* The value is found ! */ + avail_cb_nodes = stripe_count/divisor; + break; + } + /* if divisor is less than nprocs_for_coll, divisor is a + * solution, but it is not sure that it is the best one */ + else if (divisor <= nprocs_for_coll) + avail_cb_nodes = divisor; + } + divisor++; + } + } + + *striping_info_ptr = (int *) ADIOI_Malloc(3 * sizeof(int)); + striping_info = *striping_info_ptr; + striping_info[0] = stripe_size; + striping_info[1] = stripe_count; + striping_info[2] = avail_cb_nodes; +} + +int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off, + ADIO_Offset *len, int *striping_info) +{ + int rank_index, rank; + ADIO_Offset avail_bytes; + int stripe_size = striping_info[0]; + int avail_cb_nodes = striping_info[2]; + + /* Produce the stripe-contiguous pattern for Lustre */ + rank_index = (int)((off / stripe_size) % avail_cb_nodes); + + /* we index into fd_end with rank_index, and fd_end was allocated to be no + * bigger than fd->hins->cb_nodes. If we ever violate that, we're + * overrunning arrays. Obviously, we should never ever hit this abort + */ + if (rank_index >= fd->hints->cb_nodes) + MPI_Abort(MPI_COMM_WORLD, 1); + + avail_bytes = (off / (ADIO_Offset)stripe_size + 1) * + (ADIO_Offset)stripe_size - off; + if (avail_bytes < *len) { + /* this proc only has part of the requested contig. region */ + *len = avail_bytes; + } + /* map our index to a rank */ + /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */ + rank = fd->hints->ranklist[rank_index]; + + return rank; +} + +/* ADIOI_LUSTRE_Calc_my_req() - calculate what portions of the access requests + * of this process are located in the file domains of various processes + * (including this one) + */ + + +void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, + ADIO_Offset *len_list, int contig_access_count, + int *striping_info, int nprocs, + int *count_my_req_procs_ptr, + int **count_my_req_per_proc_ptr, + ADIOI_Access **my_req_ptr, + int ***buf_idx_ptr) +{ + /* Nothing different from ADIOI_Calc_my_req(), except calling + * ADIOI_Lustre_Calc_aggregator() instead of the old one */ + int *count_my_req_per_proc, count_my_req_procs, **buf_idx; + int i, l, proc; + ADIO_Offset avail_len, rem_len, curr_idx, off; + ADIOI_Access *my_req; + + *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + count_my_req_per_proc = *count_my_req_per_proc_ptr; + /* count_my_req_per_proc[i] gives the no. of contig. requests of this + * process in process i's file domain. calloc initializes to zero. + * I'm allocating memory of size nprocs, so that I can do an + * MPI_Alltoall later on. + */ + + buf_idx = (int **) ADIOI_Malloc(nprocs * sizeof(int*)); + + /* one pass just to calculate how much space to allocate for my_req; + * contig_access_count was calculated way back in ADIOI_Calc_my_off_len() + */ + for (i = 0; i < contig_access_count; i++) { + /* short circuit offset/len processing if len == 0 + * (zero-byte read/write + */ + if (len_list[i] == 0) + continue; + off = offset_list[i]; + avail_len = len_list[i]; + /* note: we set avail_len to be the total size of the access. + * then ADIOI_LUSTRE_Calc_aggregator() will modify the value to return + * the amount that was available. + */ + proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info); + count_my_req_per_proc[proc]++; + + /* figure out how many data is remaining in the access + * we'll take care of this data (if there is any) + * in the while loop below. + */ + rem_len = len_list[i] - avail_len; + + while (rem_len != 0) { + off += avail_len; /* point to first remaining byte */ + avail_len = rem_len; /* save remaining size, pass to calc */ + proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info); + count_my_req_per_proc[proc]++; + rem_len -= avail_len; /* reduce remaining length by amount from fd */ + } + } + + /* buf_idx is relevant only if buftype_is_contig. + * buf_idx[i] gives the index into user_buf where data received + * from proc 'i' should be placed. This allows receives to be done + * without extra buffer. This can't be done if buftype is not contig. + */ + + /* initialize buf_idx vectors */ + for (i = 0; i < nprocs; i++) { + /* add one to count_my_req_per_proc[i] to avoid zero size malloc */ + buf_idx[i] = (int *) ADIOI_Malloc((count_my_req_per_proc[i] + 1) + * sizeof(int)); + } + + /* now allocate space for my_req, offset, and len */ + *my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs * sizeof(ADIOI_Access)); + my_req = *my_req_ptr; + + count_my_req_procs = 0; + for (i = 0; i < nprocs; i++) { + if (count_my_req_per_proc[i]) { + my_req[i].offsets = (ADIO_Offset *) + ADIOI_Malloc(count_my_req_per_proc[i] * + sizeof(ADIO_Offset)); + my_req[i].lens = (int *) ADIOI_Malloc(count_my_req_per_proc[i] * + sizeof(int)); + count_my_req_procs++; + } + my_req[i].count = 0; /* will be incremented where needed later */ + } + + /* now fill in my_req */ + curr_idx = 0; + for (i = 0; i < contig_access_count; i++) { + /* short circuit offset/len processing if len == 0 + * (zero-byte read/write */ + if (len_list[i] == 0) + continue; + off = offset_list[i]; + avail_len = len_list[i]; + proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info); + + l = my_req[proc].count; + + ADIOI_Assert(curr_idx == (int) curr_idx); + ADIOI_Assert(l < count_my_req_per_proc[proc]); + buf_idx[proc][l] = (int) curr_idx; + curr_idx += avail_len; + + rem_len = len_list[i] - avail_len; + + /* store the proc, offset, and len information in an array + * of structures, my_req. Each structure contains the + * offsets and lengths located in that process's FD, + * and the associated count. + */ + my_req[proc].offsets[l] = off; + ADIOI_Assert(avail_len == (int) avail_len); + my_req[proc].lens[l] = (int) avail_len; + my_req[proc].count++; + + while (rem_len != 0) { + off += avail_len; + avail_len = rem_len; + proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, + striping_info); + + l = my_req[proc].count; + ADIOI_Assert(curr_idx == (int) curr_idx); + ADIOI_Assert(l < count_my_req_per_proc[proc]); + buf_idx[proc][l] = (int) curr_idx; + + curr_idx += avail_len; + rem_len -= avail_len; + + my_req[proc].offsets[l] = off; + ADIOI_Assert(avail_len == (int) avail_len); + my_req[proc].lens[l] = (int) avail_len; + my_req[proc].count++; + } + } + +#ifdef AGG_DEBUG + for (i = 0; i < nprocs; i++) { + if (count_my_req_per_proc[i] > 0) { + FPRINTF(stdout, "data needed from %d (count = %d):\n", + i, my_req[i].count); + for (l = 0; l < my_req[i].count; l++) { + FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n", + l, my_req[i].offsets[l], l, my_req[i].lens[l]); + } + } + } +#endif + + *count_my_req_procs_ptr = count_my_req_procs; + *buf_idx_ptr = buf_idx; +} + +int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count, + ADIO_Offset *len_list, int nprocs) +{ + /* If the processes are non-interleaved, we will check the req_size. + * if (avg_req_size > big_req_size) { + * docollect = 0; + * } + */ + + int i, docollect = 1, big_req_size = 0; + ADIO_Offset req_size = 0, total_req_size; + int avg_req_size, total_access_count; + + /* calculate total_req_size and total_access_count */ + for (i = 0; i < contig_access_count; i++) + req_size += len_list[i]; + MPI_Allreduce(&req_size, &total_req_size, 1, MPI_LONG_LONG_INT, MPI_SUM, + fd->comm); + MPI_Allreduce(&contig_access_count, &total_access_count, 1, MPI_INT, MPI_SUM, + fd->comm); + /* estimate average req_size */ + avg_req_size = (int)(total_req_size / total_access_count); + /* get hint of big_req_size */ + big_req_size = fd->hints->fs_hints.lustre.coll_threshold; + /* Don't perform collective I/O if there are big requests */ + if ((big_req_size > 0) && (avg_req_size > big_req_size)) + docollect = 0; + + return docollect; +} diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c index 739d3b00ce..3afd16a011 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c @@ -25,7 +25,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int if (fd->fp_sys_posn != -1) lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); if (fcntl_struct->fsize == -1) { - *error_code = MPIR_Err_create_code(MPI_SUCCESS, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); } @@ -56,7 +56,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done, &status, error_code); if (*error_code != MPI_SUCCESS) { - *error_code = MPIR_Err_create_code(MPI_SUCCESS, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); return; diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c index 817b5bd802..d925779ef3 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c @@ -1,9 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group */ #include "ad_lustre.h" @@ -11,70 +13,81 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { - char *value, *value_in_fd; - int flag, tmp_val[3], str_factor=-1, str_unit=0, start_iodev=-1; + char *value; + int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1; struct lov_user_md lum = { 0 }; int err, myrank, fd_sys, perm, amode, old_mask; + int int_val, tmp_val; + static char myname[] = "ADIOI_LUSTRE_SETINFO"; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); if ( (fd->info) == MPI_INFO_NULL) { - /* This must be part of the open call. can set striping parameters - if necessary. */ + /* This must be part of the open call. can set striping parameters + if necessary. */ MPI_Info_create(&(fd->info)); - MPI_Info_set(fd->info, "direct_read", "false"); - MPI_Info_set(fd->info, "direct_write", "false"); + ADIOI_Info_set(fd->info, "direct_read", "false"); + ADIOI_Info_set(fd->info, "direct_write", "false"); fd->direct_read = fd->direct_write = 0; - - /* has user specified striping or server buffering parameters + /* initialize lustre hints */ + ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1"); + fd->hints->fs_hints.lustre.co_ratio = 1; + ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0"); + fd->hints->fs_hints.lustre.coll_threshold = 0; + ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable"); + fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE; + + /* has user specified striping or server buffering parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { - MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + /* striping information */ + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); - if (flag) + if (flag) str_unit=atoi(value); - MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); - if (flag) + if (flag) str_factor=atoi(value); - MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, - value, &flag); - if (flag) + ADIOI_Info_get(users_info, "romio_lustre_start_iodevice", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) start_iodev=atoi(value); - MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, - value, &flag); + /* direct read and write */ + ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, + value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) { - MPI_Info_set(fd->info, "direct_read", "true"); + ADIOI_Info_set(fd->info, "direct_read", "true"); fd->direct_read = 1; } - - MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) { - MPI_Info_set(fd->info, "direct_write", "true"); + ADIOI_Info_set(fd->info, "direct_write", "true"); fd->direct_write = 1; } } + /* set striping information with ioctl */ MPI_Comm_rank(fd->comm, &myrank); if (myrank == 0) { - tmp_val[0] = str_factor; - tmp_val[1] = str_unit; - tmp_val[2] = start_iodev; + stripe_val[0] = str_factor; + stripe_val[1] = str_unit; + stripe_val[2] = start_iodev; } - MPI_Bcast(tmp_val, 3, MPI_INT, 0, fd->comm); + MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm); - if (tmp_val[0] != str_factor - || tmp_val[1] != str_unit - || tmp_val[2] != start_iodev) { + if (stripe_val[0] != str_factor + || stripe_val[1] != str_unit + || stripe_val[2] != start_iodev) { FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys" "-striping_factor:striping_unit:start_iodevice " "need to be identical across all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); - } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { + } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { /* if user has specified striping info, process 0 tries to set it */ if (!myrank) { if (fd->perm == ADIO_PERM_NULL) { @@ -100,9 +113,9 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) amode = amode | O_LOV_DELAY_CREATE | O_CREAT; fd_sys = open(fd->filename, amode, perm); - if (fd_sys == -1) { - if (errno != EEXIST) - fprintf(stderr, + if (fd_sys == -1) { + if (errno != EEXIST) + fprintf(stderr, "Failure to open file %s %d %d\n",strerror(errno), amode, perm); } else { lum.lmm_magic = LOV_USER_MAGIC; @@ -112,25 +125,73 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) lum.lmm_stripe_offset = start_iodev; err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum); - if (err == -1 && errno != EEXIST) { + if (err == -1 && errno != EEXIST) { fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno)); } close(fd_sys); } } /* End of striping parameters validation */ } - MPI_Barrier(fd->comm); - /* set the values for collective I/O and data sieving parameters */ - ADIOI_GEN_SetInfo(fd, users_info, error_code); - } else { - /* The file has been opened previously and fd->fd_sys is a valid - file descriptor. cannot set striping parameters now. */ - - /* set the values for collective I/O and data sieving parameters */ - ADIOI_GEN_SetInfo(fd, users_info, error_code); } - + /* get other hint */ + if (users_info != MPI_INFO_NULL) { + /* CO: IO Clients/OST, + * to keep the load balancing between clients and OSTs */ + ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value, + &flag); + if (flag && (int_val = atoi(value)) > 0) { + tmp_val = int_val; + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + if (tmp_val != int_val) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_lustre_co_ratio", + error_code); + ADIOI_Free(value); + return; + } + ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value); + fd->hints->fs_hints.lustre.co_ratio = atoi(value); + } + /* coll_threshold: + * if the req size is bigger than this, collective IO may not be performed. + */ + ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value, + &flag); + if (flag && (int_val = atoi(value)) > 0) { + tmp_val = int_val; + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + if (tmp_val != int_val) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_lustre_coll_threshold", + error_code); + ADIOI_Free(value); + return; + } + ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value); + fd->hints->fs_hints.lustre.coll_threshold = atoi(value); + } + /* ds_in_coll: disable data sieving in collective IO */ + ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && (!strcmp(value, "disable") || + !strcmp(value, "DISABLE"))) { + tmp_val = int_val = 2; + MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm); + if (tmp_val != int_val) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_lustre_ds_in_coll", + error_code); + ADIOI_Free(value); + return; + } + ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable"); + fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE; + } + } + /* set the values for collective I/O and data sieving parameters */ + ADIOI_GEN_SetInfo(fd, users_info, error_code); + if (ADIOI_Direct_read) fd->direct_read = 1; if (ADIOI_Direct_write) fd->direct_write = 1; diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c index 17622f6562..1ee9870fcf 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c @@ -1,17 +1,25 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group */ #include "ad_lustre.h" +/* what is the basis for this define? + * what happens if there are more than 1k UUIDs? */ + +#define MAX_LOV_UUID_COUNT 1000 + void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) { int perm, old_mask, amode, amode_direct; - struct lov_user_md lum = { 0 }; + int lumlen; + struct lov_user_md *lum = NULL; char *value; #if defined(MPICH2) || !defined(PRINT_ERR_MSG) @@ -44,23 +52,37 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) if (fd->fd_sys != -1) { int err; - value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - /* get file striping information and set it in info */ - lum.lmm_magic = LOV_USER_MAGIC; - err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum); - + /* odd malloc here because lov_user_md contains some fixed data and + * then a list of 'lmm_objects' representing stripe */ + lumlen = sizeof(struct lov_user_md) + + MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data); + /* furthermore, Pascal Deveze reports that, even though we pass a + * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this + * struct are uninitialzed, the call can give an error. calloc in case + * there are other members that must be initialized and in case + * lov_user_md struct changes in future */ + lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen); + lum->lmm_magic = LOV_USER_MAGIC; + err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum); if (!err) { - sprintf(value, "%d", lum.lmm_stripe_size); - MPI_Info_set(fd->info, "striping_unit", value); + value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - sprintf(value, "%d", lum.lmm_stripe_count); - MPI_Info_set(fd->info, "striping_factor", value); + fd->hints->striping_unit = lum->lmm_stripe_size; + sprintf(value, "%d", lum->lmm_stripe_size); + ADIOI_Info_set(fd->info, "striping_unit", value); - sprintf(value, "%d", lum.lmm_stripe_offset); - MPI_Info_set(fd->info, "start_iodevice", value); + fd->hints->striping_factor = lum->lmm_stripe_count; + sprintf(value, "%d", lum->lmm_stripe_count); + ADIOI_Info_set(fd->info, "striping_factor", value); + + fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset; + sprintf(value, "%d", lum->lmm_stripe_offset); + ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value); + + ADIOI_Free(value); } - ADIOI_Free(value); + ADIOI_Free(lum); if (fd->access_mode & ADIO_APPEND) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c index 51f9357f65..dd8f5ec8a6 100644 --- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c @@ -1,9 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group */ #define _XOPEN_SOURCE 600 @@ -18,7 +20,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int *err) { - int ntimes, rem, newrem, i, size, nbytes; + int rem, size, nbytes; if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) { *err = pwrite(fd->fd_direct, buf, len, offset); } else if (len < fd->d_miniosz) { @@ -37,7 +39,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int *err) { - int ntimes, rem, newrem, i, size, nbytes; + int rem, size, nbytes; if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) *err = pread(fd->fd_direct, buf, len, offset); else if (len < fd->d_miniosz) @@ -59,7 +61,6 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len, { int err=-1, diff, size=len, nbytes = 0; void *newbuf; - static char myname[] = "ADIOI_LUSTRE_Directio"; if (offset % fd->d_miniosz) { diff = fd->d_miniosz - (offset % fd->d_miniosz); @@ -87,7 +88,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len, memcpy(newbuf, buf, size); ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err); nbytes += err; - free(newbuf); + ADIOI_Free(newbuf); } else nbytes += pwrite(fd->fd_sys, buf, size, offset); } @@ -102,7 +103,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len, ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err); if (err > 0) memcpy(buf, newbuf, err); nbytes += err; - free(newbuf); + ADIOI_Free(newbuf); } else nbytes += pread(fd->fd_sys, buf, size, offset); } @@ -136,10 +137,23 @@ static void ADIOI_LUSTRE_IOContig(ADIO_File fd, void *buf, int count, if (err == -1) goto ioerr; } - if (io_mode) + if (io_mode) { +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); +#endif err = write(fd->fd_sys, buf, len); - else +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); +#endif + } else { +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_read_a, 0, NULL); +#endif err = read(fd->fd_sys, buf, len); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_read_b, 0, NULL); +#endif + } } else { err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode); } diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c new file mode 100644 index 0000000000..231465b481 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c @@ -0,0 +1,954 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group + */ + +#include "ad_lustre.h" +#include "adio_extern.h" + +/* prototypes of functions used for collective writes only. */ +static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf, + MPI_Datatype datatype, int nprocs, + int myrank, + ADIOI_Access *others_req, + ADIOI_Access *my_req, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, + int contig_access_count, + int *striping_info, + int **buf_idx, int *error_code); +static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf, + ADIOI_Flatlist_node *flat_buf, + char **send_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, + int *sent_to_proc, int nprocs, + int myrank, int contig_access_count, + int *striping_info, + int *send_buf_idx, + int *curr_to_proc, + int *done_to_proc, int iter, + MPI_Aint buftype_extent); +static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf, + char *write_buf, + ADIOI_Flatlist_node *flat_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, + int *recv_size, ADIO_Offset off, + int size, int *count, + int *start_pos, int *partial_recv, + int *sent_to_proc, int nprocs, + int myrank, int buftype_is_contig, + int contig_access_count, + int *striping_info, + ADIOI_Access *others_req, + int *send_buf_idx, + int *curr_to_proc, + int *done_to_proc, int *hole, + int iter, MPI_Aint buftype_extent, + int *buf_idx, int *error_code); +void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, + ADIO_Offset *srt_off, int *srt_len, int *start_pos, + int nprocs, int nprocs_recv, int total_elements); + +void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, + int file_ptr_type, ADIO_Offset offset, + ADIO_Status *status, int *error_code) +{ + /* Uses a generalized version of the extended two-phase method described + * in "An Extended Two-Phase Method for Accessing Sections of + * Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, + * Scientific Programming, (5)4:301--317, Winter 1996. + * http://www.mcs.anl.gov/home/thakur/ext2ph.ps + */ + + ADIOI_Access *my_req; + /* array of nprocs access structures, one for each other process has + this process's request */ + + ADIOI_Access *others_req; + /* array of nprocs access structures, one for each other process + whose request is written by this process. */ + + int i, filetype_is_contig, nprocs, myrank, do_collect = 0; + int contig_access_count = 0, buftype_is_contig, interleave_count = 0; + int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs; + ADIO_Offset orig_fp, start_offset, end_offset, off; + ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL; + ADIO_Offset *len_list = NULL; + int **buf_idx = NULL, *striping_info = NULL; + int old_error, tmp_error; + + MPI_Comm_size(fd->comm, &nprocs); + MPI_Comm_rank(fd->comm, &myrank); + + orig_fp = fd->fp_ind; + + /* IO patten identification if cb_write isn't disabled */ + if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { + /* For this process's request, calculate the list of offsets and + lengths in the file and determine the start and end offsets. */ + + /* Note: end_offset points to the last byte-offset that will be accessed. + * e.g., if start_offset=0 and 100 bytes to be read, end_offset=99 + */ + + ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, + &offset_list, &len_list, &start_offset, + &end_offset, &contig_access_count); + + /* each process communicates its start and end offsets to other + * processes. The result is an array each of start and end offsets + * stored in order of process rank. + */ + st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset)); + end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset)); + MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1, + ADIO_OFFSET, fd->comm); + MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1, + ADIO_OFFSET, fd->comm); + /* are the accesses of different processes interleaved? */ + for (i = 1; i < nprocs; i++) + if ((st_offsets[i] < end_offsets[i-1]) && + (st_offsets[i] <= end_offsets[i])) + interleave_count++; + /* This is a rudimentary check for interleaving, but should suffice + for the moment. */ + + /* Two typical access patterns can benefit from collective write. + * 1) the processes are interleaved, and + * 2) the req size is small. + */ + if (interleave_count > 0) { + do_collect = 1; + } else { + do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count, + len_list, nprocs); + } + } + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + + /* Decide if collective I/O should be done */ + if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) || + fd->hints->cb_write == ADIOI_HINT_DISABLE) { + + /* use independent accesses */ + if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { + ADIOI_Free(offset_list); + ADIOI_Free(len_list); + ADIOI_Free(st_offsets); + ADIOI_Free(end_offsets); + } + + fd->fp_ind = orig_fp; + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + if (buftype_is_contig && filetype_is_contig) { + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { + off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset; + ADIO_WriteContig(fd, buf, count, datatype, + ADIO_EXPLICIT_OFFSET, + off, status, error_code); + } else + ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL, + 0, status, error_code); + } else { + ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, + offset, status, error_code); + } + return; + } + + /* Get Lustre hints information */ + ADIOI_LUSTRE_Get_striping_info(fd, &striping_info, 1); + + /* calculate what portions of the access requests of this process are + * located in which process + */ + ADIOI_LUSTRE_Calc_my_req(fd, offset_list, len_list, contig_access_count, + striping_info, nprocs, &count_my_req_procs, + &count_my_req_per_proc, &my_req, + &buf_idx); + + /* based on everyone's my_req, calculate what requests of other processes + * will be accessed by this process. + * count_others_req_procs = number of processes whose requests (including + * this process itself) will be accessed by this process + * count_others_req_per_proc[i] indicates how many separate contiguous + * requests of proc. i will be accessed by this process. + */ + + ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc, + my_req, nprocs, myrank, &count_others_req_procs, + &others_req); + ADIOI_Free(count_my_req_per_proc); + + /* exchange data and write in sizes of no more than stripe_size. */ + ADIOI_LUSTRE_Exch_and_write(fd, buf, datatype, nprocs, myrank, + others_req, my_req, offset_list, len_list, + contig_access_count, striping_info, + buf_idx, error_code); + + /* If this collective write is followed by an independent write, + * it's possible to have those subsequent writes on other processes + * race ahead and sneak in before the read-modify-write completes. + * We carry out a collective communication at the end here so no one + * can start independent i/o before collective I/O completes. + * + * need to do some gymnastics with the error codes so that if something + * went wrong, all processes report error, but if a process has a more + * specific error code, we can still have that process report the + * additional information */ + + old_error = *error_code; + if (*error_code != MPI_SUCCESS) + *error_code = MPI_ERR_IO; + + /* optimization: if only one process performing i/o, we can perform + * a less-expensive Bcast */ +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL); +#endif + if (fd->hints->cb_nodes == 1) + MPI_Bcast(error_code, 1, MPI_INT, + fd->hints->ranklist[0], fd->comm); + else { + tmp_error = *error_code; + MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, + MPI_MAX, fd->comm); + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL); +#endif + + if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO)) + *error_code = old_error; + + + if (!buftype_is_contig) + ADIOI_Delete_flattened(datatype); + + /* free all memory allocated for collective I/O */ + /* free others_req */ + for (i = 0; i < nprocs; i++) { + if (others_req[i].count) { + ADIOI_Free(others_req[i].offsets); + ADIOI_Free(others_req[i].lens); + ADIOI_Free(others_req[i].mem_ptrs); + } + } + ADIOI_Free(others_req); + /* free my_req here */ + for (i = 0; i < nprocs; i++) { + if (my_req[i].count) { + ADIOI_Free(my_req[i].offsets); + ADIOI_Free(my_req[i].lens); + } + } + ADIOI_Free(my_req); + for (i = 0; i < nprocs; i++) { + ADIOI_Free(buf_idx[i]); + } + ADIOI_Free(buf_idx); + ADIOI_Free(offset_list); + ADIOI_Free(len_list); + ADIOI_Free(st_offsets); + ADIOI_Free(end_offsets); + ADIOI_Free(striping_info); + +#ifdef HAVE_STATUS_SET_BYTES + if (status) { + int bufsize, size; + /* Don't set status if it isn't needed */ + MPI_Type_size(datatype, &size); + bufsize = size * count; + MPIR_Status_set_bytes(status, datatype, bufsize); + } + /* This is a temporary way of filling in status. The right way is to + * keep track of how much data was actually written during collective I/O. + */ +#endif + + fd->fp_sys_posn = -1; /* set it to null. */ +} + +/* If successful, error_code is set to MPI_SUCCESS. Otherwise an error + * code is created and returned in error_code. + */ +static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf, + MPI_Datatype datatype, int nprocs, + int myrank, ADIOI_Access *others_req, + ADIOI_Access *my_req, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, + int contig_access_count, + int *striping_info, int **buf_idx, + int *error_code) +{ + /* Send data to appropriate processes and write in sizes of no more + * than lustre stripe_size. + * The idea is to reduce the amount of extra memory required for + * collective I/O. If all data were written all at once, which is much + * easier, it would require temp space more than the size of user_buf, + * which is often unacceptable. For example, to write a distributed + * array to a file, where each local array is 8Mbytes, requiring + * at least another 8Mbytes of temp space is unacceptable. + */ + + int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig; + ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc; + ADIO_Offset off, req_off, send_off, iter_st_off, *off_list; + ADIO_Offset max_size, step_size = 0; + int real_size, req_len, send_len; + int *recv_curr_offlen_ptr, *recv_count, *recv_size; + int *send_curr_offlen_ptr, *send_size; + int *partial_recv, *sent_to_proc, *recv_start_pos; + int *send_buf_idx, *curr_to_proc, *done_to_proc; + int *this_buf_idx; + char *write_buf = NULL; + MPI_Status status; + ADIOI_Flatlist_node *flat_buf = NULL; + MPI_Aint buftype_extent; + int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2]; + int data_sieving = 0; + + *error_code = MPI_SUCCESS; /* changed below if error */ + /* only I/O errors are currently reported */ + + /* calculate the number of writes of stripe size to be done. + * That gives the no. of communication phases as well. + * Note: + * Because we redistribute data in stripe-contiguous pattern for Lustre, + * each process has the same no. of communication phases. + */ + + for (i = 0; i < nprocs; i++) { + if (others_req[i].count) { + st_loc = others_req[i].offsets[0]; + end_loc = others_req[i].offsets[0]; + break; + } + } + for (i = 0; i < nprocs; i++) { + for (j = 0; j < others_req[i].count; j++) { + st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]); + end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] + + others_req[i].lens[j] - 1)); + } + } + /* this process does no writing. */ + if ((st_loc == -1) && (end_loc == -1)) + ntimes = 0; + MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm); + /* avoid min_st_loc be -1 */ + if (st_loc == -1) + st_loc = max_end_loc; + MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm); + /* align downward */ + min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size; + + /* Each time, only avail_cb_nodes number of IO clients perform IO, + * so, step_size=avail_cb_nodes*stripe_size IO will be performed at most, + * and ntimes=whole_file_portion/step_size + */ + step_size = (ADIO_Offset) avail_cb_nodes * stripe_size; + max_ntimes = (max_end_loc - min_st_loc + 1) / step_size + + (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0); +/* max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */ + if (ntimes) + write_buf = (char *) ADIOI_Malloc(stripe_size); + + /* calculate the start offset for each iteration */ + off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset)); + for (m = 0; m < max_ntimes; m ++) + off_list[m] = max_end_loc; + for (i = 0; i < nprocs; i++) { + for (j = 0; j < others_req[i].count; j ++) { + req_off = others_req[i].offsets[j]; + m = (int)((req_off - min_st_loc) / step_size); + off_list[m] = ADIOI_MIN(off_list[m], req_off); + } + } + + recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + /* their use is explained below. calloc initializes to 0. */ + + recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + /* to store count of how many off-len pairs per proc are satisfied + in an iteration. */ + + send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + /* total size of data to be sent to each proc. in an iteration. + Of size nprocs so that I can use MPI_Alltoall later. */ + + recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + /* total size of data to be recd. from each proc. in an iteration. */ + + sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + /* amount of data sent to each proc so far. Used in + ADIOI_Fill_send_buffer. initialized to 0 here. */ + + send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + /* Above three are used in ADIOI_Fill_send_buffer */ + + this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + + recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + /* used to store the starting value of recv_curr_offlen_ptr[i] in + this iteration */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + if (!buftype_is_contig) { + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) + flat_buf = flat_buf->next; + } + MPI_Type_extent(datatype, &buftype_extent); + /* I need to check if there are any outstanding nonblocking writes to + * the file, which could potentially interfere with the writes taking + * place in this collective write call. Since this is not likely to be + * common, let me do the simplest thing possible here: Each process + * completes all pending nonblocking operations before completing. + */ + /*ADIOI_Complete_async(error_code); + if (*error_code != MPI_SUCCESS) return; + MPI_Barrier(fd->comm); + */ + + iter_st_off = min_st_loc; + + /* Although we have recognized the data according to OST index, + * a read-modify-write will be done if there is a hole between the data. + * For example: if blocksize=60, xfersize=30 and stripe_size=100, + * then rank0 will collect data [0, 30] and [60, 90] then write. There + * is a hole in [30, 60], which will cause a read-modify-write in [0, 90]. + * + * To reduce its impact on the performance, we can disable data sieving + * by hint "ds_in_coll". + */ + /* check the hint for data sieving */ + data_sieving = fd->hints->fs_hints.lustre.ds_in_coll; + + for (m = 0; m < max_ntimes; m++) { + /* go through all others_req and my_req to check which will be received + * and sent in this iteration. + */ + + /* Note that MPI guarantees that displacements in filetypes are in + monotonically nondecreasing order and that, for writes, the + filetypes cannot specify overlapping regions in the file. This + simplifies implementation a bit compared to reads. */ + + /* + off = start offset in the file for the data to be written in + this iteration + iter_st_off = start offset of this iteration + real_size = size of data written (bytes) corresponding to off + max_size = possible maximum size of data written in this iteration + req_off = offset in the file for a particular contiguous request minus + what was satisfied in previous iteration + send_off = offset the request needed by other processes in this iteration + req_len = size corresponding to req_off + send_len = size corresponding to send_off + */ + + /* first calculate what should be communicated */ + for (i = 0; i < nprocs; i++) + recv_count[i] = recv_size[i] = send_size[i] = 0; + + off = off_list[m]; + max_size = ADIOI_MIN(step_size, max_end_loc - iter_st_off + 1); + real_size = (int) ADIOI_MIN((off / stripe_size + 1) * stripe_size - + off, + end_loc - off + 1); + + for (i = 0; i < nprocs; i++) { + if (my_req[i].count) { + this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]]; + for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) { + send_off = my_req[i].offsets[j]; + send_len = my_req[i].lens[j]; + if (send_off < iter_st_off + max_size) { + send_size[i] += send_len; + } else { + break; + } + } + send_curr_offlen_ptr[i] = j; + } + if (others_req[i].count) { + recv_start_pos[i] = recv_curr_offlen_ptr[i]; + for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) { + req_off = others_req[i].offsets[j]; + req_len = others_req[i].lens[j]; + if (req_off < iter_st_off + max_size) { + recv_count[i]++; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off)); + MPI_Address(write_buf + req_off - off, + &(others_req[i].mem_ptrs[j])); + recv_size[i] += req_len; + } else { + break; + } + } + recv_curr_offlen_ptr[i] = j; + } + } + /* use variable "hole" to pass data_sieving flag into W_Exchange_data */ + hole = data_sieving; + ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list, + len_list, send_size, recv_size, off, real_size, + recv_count, recv_start_pos, partial_recv, + sent_to_proc, nprocs, myrank, + buftype_is_contig, contig_access_count, + striping_info, others_req, send_buf_idx, + curr_to_proc, done_to_proc, &hole, m, + buftype_extent, this_buf_idx, error_code); + if (*error_code != MPI_SUCCESS) + goto over; + + flag = 0; + for (i = 0; i < nprocs; i++) + if (recv_count[i]) { + flag = 1; + break; + } + if (flag) { + /* check whether to do data sieving */ + if(data_sieving == ADIOI_HINT_ENABLE) { + ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, off, &status, + error_code); + } else { + /* if there is no hole, write data in one time; + * otherwise, write data in several times */ + if (!hole) { + ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, off, &status, + error_code); + } else { + for (i = 0; i < nprocs; i++) { + if (others_req[i].count) { + for (j = 0; j < others_req[i].count; j++) { + if (others_req[i].offsets[j] < off + real_size && + others_req[i].offsets[j] >= off) { + ADIO_WriteContig(fd, + write_buf + others_req[i].offsets[j] - off, + others_req[i].lens[j], + MPI_BYTE, ADIO_EXPLICIT_OFFSET, + others_req[i].offsets[j], &status, + error_code); + if (*error_code != MPI_SUCCESS) + goto over; + } + } + } + } + } + } + if (*error_code != MPI_SUCCESS) + goto over; + } + iter_st_off += max_size; + } +over: + if (ntimes) + ADIOI_Free(write_buf); + ADIOI_Free(recv_curr_offlen_ptr); + ADIOI_Free(send_curr_offlen_ptr); + ADIOI_Free(recv_count); + ADIOI_Free(send_size); + ADIOI_Free(recv_size); + ADIOI_Free(sent_to_proc); + ADIOI_Free(recv_start_pos); + ADIOI_Free(send_buf_idx); + ADIOI_Free(curr_to_proc); + ADIOI_Free(done_to_proc); + ADIOI_Free(this_buf_idx); + ADIOI_Free(off_list); +} + +/* Sets error_code to MPI_SUCCESS if successful, or creates an error code + * in the case of error. + */ +static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf, + char *write_buf, + ADIOI_Flatlist_node *flat_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, + int *recv_size, ADIO_Offset off, + int size, int *count, + int *start_pos, int *partial_recv, + int *sent_to_proc, int nprocs, + int myrank, int buftype_is_contig, + int contig_access_count, + int *striping_info, + ADIOI_Access *others_req, + int *send_buf_idx, + int *curr_to_proc, int *done_to_proc, + int *hole, int iter, + MPI_Aint buftype_extent, + int *buf_idx, int *error_code) +{ + int i, j, nprocs_recv, nprocs_send, err; + char **send_buf = NULL; + MPI_Request *requests, *send_req; + MPI_Datatype *recv_types; + MPI_Status *statuses, status; + int *srt_len, sum, sum_recv; + ADIO_Offset *srt_off; + int data_sieving = *hole; + static char myname[] = "ADIOI_W_EXCHANGE_DATA"; + + /* create derived datatypes for recv */ + nprocs_recv = 0; + for (i = 0; i < nprocs; i++) + if (recv_size[i]) + nprocs_recv++; + + recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv + 1) * + sizeof(MPI_Datatype)); + /* +1 to avoid a 0-size malloc */ + + j = 0; + for (i = 0; i < nprocs; i++) { + if (recv_size[i]) { + MPI_Type_hindexed(count[i], + &(others_req[i].lens[start_pos[i]]), + &(others_req[i].mem_ptrs[start_pos[i]]), + MPI_BYTE, recv_types + j); + /* absolute displacements; use MPI_BOTTOM in recv */ + MPI_Type_commit(recv_types + j); + j++; + } + } + + /* To avoid a read-modify-write, + * check if there are holes in the data to be written. + * For this, merge the (sorted) offset lists others_req using a heap-merge. + */ + + sum = 0; + for (i = 0; i < nprocs; i++) + sum += count[i]; + srt_off = (ADIO_Offset *) ADIOI_Malloc((sum + 1) * sizeof(ADIO_Offset)); + srt_len = (int *) ADIOI_Malloc((sum + 1) * sizeof(int)); + /* +1 to avoid a 0-size malloc */ + + ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos, + nprocs, nprocs_recv, sum); + + /* check if there are any holes */ + *hole = 0; + for (i = 0; i < sum - 1; i++) { + if (srt_off[i] + srt_len[i] < srt_off[i + 1]) { + *hole = 1; + break; + } + } + /* In some cases (see John Bent ROMIO REQ # 835), an odd interaction + * between aggregation, nominally contiguous regions, and cb_buffer_size + * should be handled with a read-modify-write (otherwise we will write out + * more data than we receive from everyone else (inclusive), so override + * hole detection + */ + if (*hole == 0) { + sum_recv = 0; + for (i = 0; i < nprocs; i++) + sum_recv += recv_size[i]; + if (size > sum_recv) + *hole = 1; + } + /* check the hint for data sieving */ + if (data_sieving == ADIOI_HINT_ENABLE && nprocs_recv && *hole) { + ADIO_ReadContig(fd, write_buf, size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, off, &status, &err); + // --BEGIN ERROR HANDLING-- + if (err != MPI_SUCCESS) { + *error_code = MPIO_Err_create_code(err, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_IO, + "**ioRMWrdwr", 0); + ADIOI_Free(recv_types); + ADIOI_Free(srt_off); + ADIOI_Free(srt_len); + return; + } + // --END ERROR HANDLING-- + } + ADIOI_Free(srt_off); + ADIOI_Free(srt_len); + + nprocs_send = 0; + for (i = 0; i < nprocs; i++) + if (send_size[i]) + nprocs_send++; + + if (fd->atomicity) { + /* bug fix from Wei-keng Liao and Kenin Coloma */ + requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + 1) * + sizeof(MPI_Request)); + send_req = requests; + } else { + requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1)* + sizeof(MPI_Request)); + /* +1 to avoid a 0-size malloc */ + + /* post receives */ + j = 0; + for (i = 0; i < nprocs; i++) { + if (recv_size[i]) { + MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, + myrank + i + 100 * iter, fd->comm, requests + j); + j++; + } + } + send_req = requests + nprocs_recv; + } + + /* post sends. + * if buftype_is_contig, data can be directly sent from + * user buf at location given by buf_idx. else use send_buf. + */ + if (buftype_is_contig) { + j = 0; + for (i = 0; i < nprocs; i++) + if (send_size[i]) { + ADIOI_Assert(buf_idx[i] != -1); + MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], + MPI_BYTE, i, myrank + i + 100 * iter, fd->comm, + send_req + j); + j++; + } + } else + if (nprocs_send) { + /* buftype is not contig */ + send_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char *)); + for (i = 0; i < nprocs; i++) + if (send_size[i]) + send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); + + ADIOI_LUSTRE_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list, + len_list, send_size, send_req, + sent_to_proc, nprocs, myrank, + contig_access_count, striping_info, + send_buf_idx, curr_to_proc, done_to_proc, + iter, buftype_extent); + /* the send is done in ADIOI_Fill_send_buffer */ + } + + /* bug fix from Wei-keng Liao and Kenin Coloma */ + if (fd->atomicity) { + j = 0; + for (i = 0; i < nprocs; i++) { + MPI_Status wkl_status; + if (recv_size[i]) { + MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, + myrank + i + 100 * iter, fd->comm, &wkl_status); + j++; + } + } + } + + for (i = 0; i < nprocs_recv; i++) + MPI_Type_free(recv_types + i); + ADIOI_Free(recv_types); + + /* bug fix from Wei-keng Liao and Kenin Coloma */ + /* +1 to avoid a 0-size malloc */ + if (fd->atomicity) { + statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + 1) * + sizeof(MPI_Status)); + } else { + statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1) * + sizeof(MPI_Status)); + } + +#ifdef NEEDS_MPI_TEST + i = 0; + if (fd->atomicity) { + /* bug fix from Wei-keng Liao and Kenin Coloma */ + while (!i) + MPI_Testall(nprocs_send, send_req, &i, statuses); + } else { + while (!i) + MPI_Testall(nprocs_send + nprocs_recv, requests, &i, statuses); + } +#else + /* bug fix from Wei-keng Liao and Kenin Coloma */ + if (fd->atomicity) + MPI_Waitall(nprocs_send, send_req, statuses); + else + MPI_Waitall(nprocs_send + nprocs_recv, requests, statuses); +#endif + ADIOI_Free(statuses); + ADIOI_Free(requests); + if (!buftype_is_contig && nprocs_send) { + for (i = 0; i < nprocs; i++) + if (send_size[i]) + ADIOI_Free(send_buf[i]); + ADIOI_Free(send_buf); + } +} + +#define ADIOI_BUF_INCR \ +{ \ + while (buf_incr) { \ + size_in_buf = ADIOI_MIN(buf_incr, flat_buf_sz); \ + user_buf_idx += size_in_buf; \ + flat_buf_sz -= size_in_buf; \ + if (!flat_buf_sz) { \ + if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \ + else { \ + flat_buf_idx = 0; \ + n_buftypes++; \ + } \ + user_buf_idx = flat_buf->indices[flat_buf_idx] + \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ + flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ + } \ + buf_incr -= size_in_buf; \ + } \ +} + + +#define ADIOI_BUF_COPY \ +{ \ + while (size) { \ + size_in_buf = ADIOI_MIN(size, flat_buf_sz); \ + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \ + ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \ + memcpy(&(send_buf[p][send_buf_idx[p]]), \ + ((char *) buf) + user_buf_idx, size_in_buf); \ + send_buf_idx[p] += size_in_buf; \ + user_buf_idx += size_in_buf; \ + flat_buf_sz -= size_in_buf; \ + if (!flat_buf_sz) { \ + if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \ + else { \ + flat_buf_idx = 0; \ + n_buftypes++; \ + } \ + user_buf_idx = flat_buf->indices[flat_buf_idx] + \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ + flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ + } \ + size -= size_in_buf; \ + buf_incr -= size_in_buf; \ + } \ + ADIOI_BUF_INCR \ +} + +static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf, + ADIOI_Flatlist_node *flat_buf, + char **send_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, + int *sent_to_proc, int nprocs, + int myrank, + int contig_access_count, + int *striping_info, + int *send_buf_idx, + int *curr_to_proc, + int *done_to_proc, int iter, + MPI_Aint buftype_extent) +{ + /* this function is only called if buftype is not contig */ + int i, p, flat_buf_idx, size; + int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes; + ADIO_Offset off, len, rem_len, user_buf_idx; + + /* curr_to_proc[p] = amount of data sent to proc. p that has already + * been accounted for so far + * done_to_proc[p] = amount of data already sent to proc. p in + * previous iterations + * user_buf_idx = current location in user buffer + * send_buf_idx[p] = current location in send_buf of proc. p + */ + + for (i = 0; i < nprocs; i++) { + send_buf_idx[i] = curr_to_proc[i] = 0; + done_to_proc[i] = sent_to_proc[i]; + } + jj = 0; + + user_buf_idx = flat_buf->indices[0]; + flat_buf_idx = 0; + n_buftypes = 0; + flat_buf_sz = flat_buf->blocklens[0]; + + /* flat_buf_idx = current index into flattened buftype + * flat_buf_sz = size of current contiguous component in flattened buf + */ + for (i = 0; i < contig_access_count; i++) { + off = offset_list[i]; + rem_len = (ADIO_Offset) len_list[i]; + + /*this request may span to more than one process */ + while (rem_len != 0) { + len = rem_len; + /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no + * longer than the single region that processor "p" is responsible + * for. + */ + p = ADIOI_LUSTRE_Calc_aggregator(fd, off, &len, striping_info); + + if (send_buf_idx[p] < send_size[p]) { + if (curr_to_proc[p] + len > done_to_proc[p]) { + if (done_to_proc[p] > curr_to_proc[p]) { + size = (int) ADIOI_MIN(curr_to_proc[p] + len - + done_to_proc[p], + send_size[p] - + send_buf_idx[p]); + buf_incr = done_to_proc[p] - curr_to_proc[p]; + ADIOI_BUF_INCR + ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p])); + buf_incr = (int) (curr_to_proc[p] + len - + done_to_proc[p]); + ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size)); + curr_to_proc[p] = done_to_proc[p] + size; + ADIOI_BUF_COPY + } else { + size = (int) ADIOI_MIN(len, send_size[p] - + send_buf_idx[p]); + buf_incr = (int) len; + ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size)); + curr_to_proc[p] += size; + ADIOI_BUF_COPY + } + if (send_buf_idx[p] == send_size[p]) { + MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, + myrank + p + 100 * iter, fd->comm, + requests + jj); + jj++; + } + } else { + ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len)); + curr_to_proc[p] += (int) len; + buf_incr = (int) len; + ADIOI_BUF_INCR + } + } else { + buf_incr = (int) len; + ADIOI_BUF_INCR + } + off += len; + rem_len -= len; + } + } + for (i = 0; i < nprocs; i++) + if (send_size[i]) + sent_to_proc[i] = curr_to_proc[i]; +} diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c new file mode 100644 index 0000000000..2854a37c7e --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c @@ -0,0 +1,530 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + * Copyright (C) 2007 Oak Ridge National Laboratory + * + * Copyright (C) 2008 Sun Microsystems, Lustre group + */ + +#include "ad_lustre.h" +#include "adio_extern.h" + +#define ADIOI_BUFFERED_WRITE \ +{ \ + if (req_off >= writebuf_off + writebuf_len) { \ + if (writebuf_len) { \ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, writebuf_off, \ + &status1, error_code); \ + if (!(fd->atomicity)) \ + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, \ + myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowswc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + } \ + writebuf_off = req_off; \ + /* stripe_size alignment */ \ + writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ + (writebuf_off / stripe_size + 1) * \ + stripe_size - writebuf_off); \ + if (!(fd->atomicity)) \ + ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, \ + writebuf_off, &status1, error_code); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, \ + myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowsrc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + } \ + write_sz = (unsigned) (ADIOI_MIN(req_len, \ + writebuf_off + writebuf_len - req_off)); \ + ADIOI_Assert((ADIO_Offset)write_sz == \ + ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + memcpy(writebuf + req_off - writebuf_off, (char *)buf +userbuf_off, write_sz); \ + while (write_sz != req_len) { \ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ + if (!(fd->atomicity)) \ + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowswc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + req_len -= write_sz; \ + userbuf_off += write_sz; \ + writebuf_off += writebuf_len; \ + /* stripe_size alignment */ \ + writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ + (writebuf_off / stripe_size + 1) * \ + stripe_size - writebuf_off); \ + if (!(fd->atomicity)) \ + ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, \ + writebuf_off, &status1, error_code); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowsrc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + write_sz = ADIOI_MIN(req_len, writebuf_len); \ + memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \ + } \ +} + + +/* this macro is used when filetype is contig and buftype is not contig. + it does not do a read-modify-write and does not lock*/ +#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \ +{ \ + if (req_off >= writebuf_off + writebuf_len) { \ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, \ + error_code); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, \ + myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowswc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + writebuf_off = req_off; \ + /* stripe_size alignment */ \ + writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ + (writebuf_off / stripe_size + 1) * \ + stripe_size - writebuf_off); \ + } \ + write_sz = (unsigned) ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off); \ + ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + memcpy(writebuf + req_off - writebuf_off, \ + (char *)buf + userbuf_off, write_sz); \ + while (write_sz != req_len) { \ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ + ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ + if (*error_code != MPI_SUCCESS) { \ + *error_code = MPIO_Err_create_code(*error_code, \ + MPIR_ERR_RECOVERABLE, myname, \ + __LINE__, MPI_ERR_IO, \ + "**iowswc", 0); \ + ADIOI_Free(writebuf); \ + return; \ + } \ + req_len -= write_sz; \ + userbuf_off += write_sz; \ + writebuf_off += writebuf_len; \ + /* stripe_size alignment */ \ + writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ + (writebuf_off / stripe_size + 1) * \ + stripe_size - writebuf_off); \ + write_sz = ADIOI_MIN(req_len, writebuf_len); \ + memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \ + } \ +} + +void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status * status, + int *error_code) +{ + /* offset is in units of etype relative to the filetype. */ + ADIOI_Flatlist_node *flat_buf, *flat_file; + ADIO_Offset i_offset, sum, size_in_filetype; + int i, j, k, st_index=0; + int n_etypes_in_filetype; + ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes; + ADIO_Offset abs_off_in_filetype=0; + int filetype_size, etype_size, buftype_size; + MPI_Aint filetype_extent, buftype_extent; + int buf_count, buftype_is_contig, filetype_is_contig; + ADIO_Offset userbuf_off; + ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; + char *writebuf; + unsigned bufsize, writebuf_len, write_sz; + ADIO_Status status1; + ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len; + int stripe_size; + static char myname[] = "ADIOI_LUSTRE_WriteStrided"; + + if (fd->hints->ds_write == ADIOI_HINT_DISABLE) { + /* if user has disabled data sieving on writes, use naive + * approach instead. + */ + ADIOI_GEN_WriteStrided_naive(fd, + buf, + count, + datatype, + file_ptr_type, + offset, status, error_code); + return; + } + + *error_code = MPI_SUCCESS; /* changed below if error */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + MPI_Type_size(fd->filetype, &filetype_size); + if (!filetype_size) { + *error_code = MPI_SUCCESS; + return; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + etype_size = fd->etype_size; + + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); + bufsize = buftype_size * count; + + /* get striping info */ + stripe_size = fd->hints->striping_unit; + + /* Different buftype to different filetype */ + if (!buftype_is_contig && filetype_is_contig) { + /* noncontiguous in memory, contiguous in file. */ + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) + flat_buf = flat_buf->next; + + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + fd->disp + (ADIO_Offset)etype_size * offset; + + start_off = off; + end_offset = start_off + bufsize - 1; + /* write stripe size buffer each time */ + writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size)); + writebuf_off = 0; + writebuf_len = 0; + + /* if atomicity is true, lock the region to be accessed */ + if (fd->atomicity) + ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize); + + for (j = 0; j < count; j++) { + for (i = 0; i < flat_buf->count; i++) { + userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent + + flat_buf->indices[i]; + req_off = off; + req_len = flat_buf->blocklens[i]; + ADIOI_BUFFERED_WRITE_WITHOUT_READ + off += flat_buf->blocklens[i]; + } + } + + /* write the buffer out finally */ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, + error_code); + + if (fd->atomicity) + ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize); + if (*error_code != MPI_SUCCESS) { + ADIOI_Free(writebuf); + return; + } + ADIOI_Free(writebuf); + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind = off; + } else { + /* noncontiguous in file */ + /* filetype already flattened in ADIO_Open */ + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) + flat_file = flat_file->next; + disp = fd->disp; + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* fwr_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + fwr_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + fwr_size = dist; + break; + } + } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } + else { + n_etypes_in_filetype = filetype_size/etype_size; + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; + size_in_filetype = etype_in_filetype * etype_size; + + sum = 0; + for (i = 0; i < flat_file->count; i++) { + sum += flat_file->blocklens[i]; + if (sum > size_in_filetype) { + st_index = i; + fwr_size = sum - size_in_filetype; + abs_off_in_filetype = flat_file->indices[i] + + size_in_filetype - (sum - flat_file->blocklens[i]); + break; + } + } + + /* abs. offset in bytes in the file */ + offset = disp + (ADIO_Offset) n_filetypes *filetype_extent + + abs_off_in_filetype; + } + + start_off = offset; + + /* Wei-keng Liao:write request is within single flat_file + * contig block*/ + /* this could happen, for example, with subarray types that are + * actually fairly contiguous */ + if (buftype_is_contig && bufsize <= fwr_size) { + req_off = start_off; + req_len = bufsize; + end_offset = start_off + bufsize - 1; + writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size)); + memset(writebuf, -1, ADIOI_MIN(bufsize, stripe_size)); + writebuf_off = 0; + writebuf_len = 0; + userbuf_off = 0; + ADIOI_BUFFERED_WRITE_WITHOUT_READ + /* write the buffer out finally */ + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, + error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte + * that can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == fwr_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + (ADIO_Offset)n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + ADIOI_Free(writebuf); + return; + } + + /* Calculate end_offset, the last byte-offset that will be accessed. + e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/ + + st_fwr_size = fwr_size; + st_n_filetypes = n_filetypes; + i_offset = 0; + j = st_index; + off = offset; + fwr_size = ADIOI_MIN(st_fwr_size, bufsize); + while (i_offset < bufsize) { + i_offset += fwr_size; + end_offset = off + fwr_size - 1; + + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } + + off = disp + flat_file->indices[j] + + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); + } + +/* if atomicity is true, lock the region to be accessed */ + if (fd->atomicity) + ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); + + writebuf_off = 0; + writebuf_len = 0; + writebuf = (char *) ADIOI_Malloc(stripe_size); + memset(writebuf, -1, stripe_size); + + if (buftype_is_contig && !filetype_is_contig) { + +/* contiguous in memory, noncontiguous in file. should be the most + common case. */ + + i_offset = 0; + j = st_index; + off = offset; + n_filetypes = st_n_filetypes; + fwr_size = ADIOI_MIN(st_fwr_size, bufsize); + while (i_offset < bufsize) { + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ + /* lseek(fd->fd_sys, off, SEEK_SET); + err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/ + + req_off = off; + req_len = fwr_size; + userbuf_off = i_offset; + ADIOI_BUFFERED_WRITE + } + i_offset += fwr_size; + + if (off + fwr_size < disp + flat_file->indices[j] + + flat_file->blocklens[j] + + n_filetypes*(ADIO_Offset)filetype_extent) + off += fwr_size; + /* did not reach end of contiguous block in filetype. + no more I/O needed. off is incremented by fwr_size. */ + else { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } + off = disp + flat_file->indices[j] + + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], + bufsize-i_offset); + } + } + } + else { +/* noncontiguous in memory as well as in file */ + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + k = num = buf_count = 0; + i_offset = flat_buf->indices[0]; + j = st_index; + off = offset; + n_filetypes = st_n_filetypes; + fwr_size = st_fwr_size; + bwr_size = flat_buf->blocklens[0]; + + while (num < bufsize) { + size = ADIOI_MIN(fwr_size, bwr_size); + if (size) { + /* lseek(fd->fd_sys, off, SEEK_SET); + err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */ + + req_off = off; + req_len = size; + userbuf_off = i_offset; + ADIOI_BUFFERED_WRITE + } + + new_fwr_size = fwr_size; + new_bwr_size = bwr_size; + + if (size == fwr_size) { +/* reached end of contiguous block in file */ + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } + + off = disp + flat_file->indices[j] + + n_filetypes*(ADIO_Offset)filetype_extent; + + new_fwr_size = flat_file->blocklens[j]; + if (size != bwr_size) { + i_offset += size; + new_bwr_size -= size; + } + } + + if (size == bwr_size) { +/* reached end of contiguous block in memory */ + + k = (k + 1)%flat_buf->count; + buf_count++; + i_offset = (ADIO_Offset)buftype_extent * + (ADIO_Offset)(buf_count/flat_buf->count) + + flat_buf->indices[k]; + new_bwr_size = flat_buf->blocklens[k]; + if (size != fwr_size) { + off += size; + new_fwr_size -= size; + } + } + num += size; + fwr_size = new_fwr_size; + bwr_size = new_bwr_size; + } + } + + /* write the buffer out finally */ + if (writebuf_len) { + ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, + writebuf_off, &status1, error_code); + if (!(fd->atomicity)) + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); + if (*error_code != MPI_SUCCESS) return; + } + if (fd->atomicity) + ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); + + ADIOI_Free(writebuf); + + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; + } + + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + + if (!buftype_is_contig) + ADIOI_Delete_flattened(datatype); +} diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am index 21116d71dc..0caa867c3f 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am @@ -24,6 +24,7 @@ libadio_nfs_la_SOURCES = \ ad_nfs.h \ ad_nfs_done.c \ ad_nfs_fcntl.c \ + ad_nfs_features.c \ ad_nfs_getsh.c \ ad_nfs_hints.c \ ad_nfs_iread.c \ diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c index cb505980db..c3f8b4ce4f 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_NFS_operations = { ADIOI_NFS_Open, /* Open */ + ADIOI_FAILSAFE_OpenColl, /* OpenColl */ ADIOI_NFS_ReadContig, /* ReadContig */ ADIOI_NFS_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -35,4 +36,5 @@ struct ADIOI_Fns_struct ADIO_NFS_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_NFS_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_NFS_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h index e6fdda2903..5a1daa5f64 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h @@ -78,5 +78,6 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code); void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code); void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); +int ADIOI_NFS_Feature(ADIO_File fd, int feature_flag); #endif diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c new file mode 100644 index 0000000000..01768f71f3 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c @@ -0,0 +1,16 @@ +#include "adio.h" +#include "ad_nfs.h" + +int ADIOI_NFS_Feature(ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_SHARED_FP: + case ADIO_LOCKS: + case ADIO_SEQUENTIAL: + case ADIO_DATA_SIEVING_WRITES: + return 1; + case ADIO_SCALABLE_OPEN: + default: + return 0; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c index 4fd4677eff..0cfd562170 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c @@ -59,6 +59,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, struct aiocb *aiocbp; ADIOI_AIO_Request *aio_req; + MPI_Status status; fd_sys = fd->fd_sys; @@ -108,7 +109,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, /* exceeded the max. no. of outstanding requests. complete all previous async. requests and try again. */ ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, - offset, NULL, &error_code); + offset, &status, &error_code); MPIO_Completed_request_create(&fd, len, &error_code, request); return 0; } else { diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c index cf8f01895d..b28a57bb55 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c @@ -177,7 +177,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; char *readbuf, *tmp_buf, *value; - int flag, st_frd_size, st_n_filetypes, readbuf_len; + int st_frd_size, st_n_filetypes, readbuf_len; int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize; static char myname[] = "ADIOI_NFS_READSTRIDED"; @@ -201,7 +201,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -278,25 +278,32 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* frd_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + frd_size = flat_file->blocklens[i]; + break; + } + if (dist > 0 ) { + frd_size = dist; + break; + } + } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } else { n_etypes_in_filetype = filetype_size/etype_size; n_filetypes = (int) (offset / n_etypes_in_filetype); @@ -316,11 +323,42 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao: read request is within a single flat_file contig + * block e.g. with subarray types that actually describe the whole + * array */ + if (buftype_is_contig && bufsize <= frd_size) { + ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte that + * can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == frd_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } + /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ @@ -333,11 +371,11 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, while (i < bufsize) { i += frd_size; end_offset = off + frd_size - 1; - - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; @@ -402,11 +440,12 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by frd_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); @@ -445,12 +484,12 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, if (size == frd_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c index ad8da83d78..0148f63973 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c @@ -10,120 +10,6 @@ void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) { return; -#if 0 -#ifdef ROMIO_HAVE_WORKING_AIO - int err; - static char myname[] = "ADIOI_NFS_READCOMPLETE"; -#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_HANDLE - struct aiocb *tmp1; -#endif -#endif - - if (*request == ADIO_REQUEST_NULL) { - *error_code = MPI_SUCCESS; - return; - } - -#ifdef ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS -/* old IBM */ - if ((*request)->queued) { - do { -#if !defined(_AIO_AIX_SOURCE) && !defined(_NO_PROTO) - err = aio_suspend((*request)->handle,1,NULL); -#else - err = aio_suspend(1, (struct aiocb **) &((*request)->handle)); -#endif - } while ((err == -1) && (errno == EINTR)); - - tmp1 = (struct aiocb *) (*request)->handle; - if (err != -1) { - err = aio_return(tmp1->aio_handle); - (*request)->nbytes = err; - errno = aio_error(tmp1->aio_handle); - } - else (*request)->nbytes = -1; - -/* on DEC, it is required to call aio_return to dequeue the request. - IBM man pages don't indicate what function to use for dequeue. - I'm assuming it is aio_return! */ - - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, - __LINE__, MPI_ERR_IO, "**io", - "**io %s", strerror(errno)); - } - else *error_code = MPI_SUCCESS; - } - else *error_code = MPI_SUCCESS; /* if ( (*request)->queued ) */ - -#ifdef HAVE_STATUS_SET_BYTES - if ((*request)->nbytes != -1) - MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); -#endif - -#elif defined(ROMIO_HAVE_WORKING_AIO) -/* all other aio types */ - if ((*request)->queued) { - do { - err = aio_suspend((const struct aiocb **) &((*request)->handle), 1, 0); - } while ((err == -1) && (errno == EINTR)); - - if (err != -1) { - err = aio_return((struct aiocb *) (*request)->handle); - (*request)->nbytes = err; - errno = aio_error((struct aiocb *) (*request)->handle); - } - else (*request)->nbytes = -1; - - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, - __LINE__, MPI_ERR_IO, "**io", - "**io %s", strerror(errno)); - } - else *error_code = MPI_SUCCESS; - } - else *error_code = MPI_SUCCESS; /* if ((*request)->queued) ... */ -#ifdef HAVE_STATUS_SET_BYTES - if ((*request)->nbytes != -1) - MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); -#endif -#endif - -#ifdef ROMIO_HAVE_WORKING_AIO - if ((*request)->queued != -1) { - - /* queued = -1 is an internal hack used when the request must - be completed, but the request object should not be - freed. This is used in ADIOI_Complete_async, because the user - will call MPI_Wait later, which would require status to - be filled. Ugly but works. queued = -1 should be used only - in ADIOI_Complete_async. - This should not affect the user in any way. */ - - /* if request is still queued in the system, it is also there - on ADIOI_Async_list. Delete it from there. */ - if ((*request)->queued) ADIOI_Del_req_from_list(request); - - (*request)->fd->async_count--; - if ((*request)->handle) ADIOI_Free((*request)->handle); - ADIOI_Free_request((ADIOI_Req_node *) (*request)); - *request = ADIO_REQUEST_NULL; - } - -#else -/* no aio */ - -#ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); -#endif - (*request)->fd->async_count--; - ADIOI_Free_request((ADIOI_Req_node *) (*request)); - *request = ADIO_REQUEST_NULL; - *error_code = MPI_SUCCESS; -#endif -#endif } diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c index f856685e59..7c82e314e1 100644 --- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c @@ -281,7 +281,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; char *writebuf, *value; - int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz; + int st_fwr_size, st_n_filetypes, writebuf_len, write_sz; int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize; static char myname[] = "ADIOI_NFS_WRITESTRIDED"; @@ -304,7 +304,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -381,25 +381,32 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - fwr_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* fwr_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + fwr_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + fwr_size = dist; + break; + } + } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } else { n_etypes_in_filetype = filetype_size/etype_size; n_filetypes = (int) (offset / n_etypes_in_filetype); @@ -419,10 +426,40 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao:write request is within single flat_file contig block*/ + /* this could happen, for example, with subarray types that are + * actually fairly contiguous */ + if (buftype_is_contig && bufsize <= fwr_size) { + ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte + * that can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == fwr_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + (ADIO_Offset)n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/ @@ -436,14 +473,15 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, while (i < bufsize) { i += fwr_size; end_offset = off + fwr_size - 1; - - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; + off = disp + flat_file->indices[j] + + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } @@ -509,13 +547,14 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by fwr_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } } @@ -552,10 +591,11 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count, if (size == fwr_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + diff --git a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c index 23c482875d..c49f2e86eb 100644 --- a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_NTFS_operations = { ADIOI_NTFS_Open, /* Open */ + ADIOI_GEN_OpenColl, /* OpenColl */ ADIOI_NTFS_ReadContig, /* ReadContig */ ADIOI_NTFS_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_NTFS_operations = { ADIOI_NTFS_Flush, /* Flush */ ADIOI_NTFS_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c index 33ce0f1acd..b43e1a9a75 100644 --- a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c +++ b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c @@ -60,7 +60,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status) /* TODO: unsure how to handle this */ } }else{ - MPIR_Nest_incr(); mpi_errno = MPI_Grequest_complete(aio_req->req); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, @@ -69,7 +68,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status) MPI_ERR_IO, "**mpi_grequest_complete", 0); } - MPIR_Nest_decr(); } return mpi_errno; } @@ -111,16 +109,14 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states, aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes), FALSE)){ /* XXX: mark completed requests as 'done'*/ - MPIR_Nest_incr(); - mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req); + mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "ADIOI_NTFS_aio_wait_fn", __LINE__, MPI_ERR_IO, "**mpi_grequest_complete", 0); - } - MPIR_Nest_decr(); + } }else{ if(GetLastError() == ERROR_IO_INCOMPLETE){ /* IO in progress */ @@ -146,7 +142,6 @@ int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status) MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); - /* do i need to nest_incr/nest_decr here? */ /* can never cancel so always true */ MPI_Status_set_cancelled(status, 0); diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c index 7af1045014..f2e96bbba8 100644 --- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c @@ -13,6 +13,7 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = { ADIOI_PANFS_Open, /* Open */ + ADIOI_GEN_OpenColl, ADIOI_PANFS_ReadContig, /* ReadContig */ ADIOI_PANFS_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -39,4 +40,5 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_PANFS_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature, }; diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c index 9b02a414d7..4755a0aa74 100644 --- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c @@ -36,7 +36,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { concurrent_write = strtoul(value,NULL,10); @@ -46,10 +46,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_concurrent_write\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_concurrent_write", value); + ADIOI_Info_set(fd->info, "panfs_concurrent_write", value); } - MPI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_type = strtoul(value,NULL,10); @@ -59,10 +59,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_type\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_type", value); + ADIOI_Info_set(fd->info, "panfs_layout_type", value); } - MPI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_stripe_unit = strtoul(value,NULL,10); @@ -72,10 +72,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_stripe_unit\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_stripe_unit", value); + ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", value); } - MPI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, value, &flag); if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) { layout_parity_stripe_width = strtoul(value,NULL,10); @@ -85,10 +85,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_width\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value); + ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value); } - MPI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, value, &flag); if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) { layout_parity_stripe_depth = strtoul(value,NULL,10); @@ -98,10 +98,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_depth\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value); + ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value); } - MPI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_total_num_comps = strtoul(value,NULL,10); @@ -111,10 +111,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_total_num_comps\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_total_num_comps", value); + ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", value); } - MPI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, value, &flag); if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) { layout_visit_policy = strtoul(value,NULL,10); @@ -124,7 +124,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_visit_policy\" must be the same on all processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Info_set(fd->info, "panfs_layout_visit_policy", value); + ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", value); } ADIOI_Free(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c index b5f2a124b9..d7cdf19454 100644 --- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c @@ -39,32 +39,32 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) *error_code = MPI_SUCCESS; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_type = strtoul(value,NULL,10); } - MPI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_stripe_unit = strtoul(value,NULL,10); } - MPI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_total_num_comps = strtoul(value,NULL,10); } - MPI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_width = strtoul(value,NULL,10); } - MPI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_depth = strtoul(value,NULL,10); } - MPI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_visit_policy = strtoul(value,NULL,10); @@ -266,7 +266,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) amode = amode | O_EXCL; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { unsigned long int concurrent_write = strtoul(value,NULL,10); @@ -291,41 +291,41 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) if (rc < 0) { /* Error - set layout type to unknown */ - MPI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID"); + ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID"); } else { ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type); - MPI_Info_set(fd->info, "panfs_layout_type", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer); if (file_query_args.layout.layout_is_valid == 1) { switch (file_query_args.layout.agg_type) { case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit); - MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps); - MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); break; case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit); - MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width); - MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth); - MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps); - MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy); - MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); break; case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10: ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit); - MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps); - MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer); ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy); - MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); + ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer); break; } } diff --git a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c index d4a8575a66..5815ca0925 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c @@ -24,7 +24,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -40,7 +40,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } - MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -56,7 +56,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } - MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); @@ -119,15 +119,15 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) If so, mark it as true in fd->info and turn it on in ADIOI_PFS_Open after the file is opened */ - MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true"))) - MPI_Info_set(fd->info, "pfs_svr_buf", "true"); - else MPI_Info_set(fd->info, "pfs_svr_buf", "false"); + ADIOI_Info_set(fd->info, "pfs_svr_buf", "true"); + else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); ADIOI_Free(value); } - else MPI_Info_set(fd->info, "pfs_svr_buf", "false"); + else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); @@ -144,23 +144,23 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) { value_in_fd = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value_in_fd, &flag); if (strcmp(value, value_in_fd)) { if (!strcmp(value, "true")) { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE); if (!err) - MPI_Info_set(fd->info, "pfs_svr_buf", "true"); + ADIOI_Info_set(fd->info, "pfs_svr_buf", "true"); } else { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE); if (!err) - MPI_Info_set(fd->info, "pfs_svr_buf", "false"); + ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); } } ADIOI_Free(value_in_fd); diff --git a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c index c56c0f991f..c424af07fa 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c @@ -49,11 +49,11 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code) to ADIOI_PFS_SetInfo. Turn it on now, since we now have a valid file descriptor. */ - MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true"))) { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE); - if (err) MPI_Info_set(fd->info, "pfs_svr_buf", "false"); + if (err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); } /* get file striping information and set it in info */ @@ -61,13 +61,13 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code) if (!err) { ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sunitsize); - MPI_Info_set(fd->info, "striping_unit", value); + ADIOI_Info_set(fd->info, "striping_unit", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sfactor); - MPI_Info_set(fd->info, "striping_factor", value); + ADIOI_Info_set(fd->info, "striping_factor", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_start_sdir); - MPI_Info_set(fd->info, "start_iodevice", value); + ADIOI_Info_set(fd->info, "start_iodevice", value); } ADIOI_Free(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am index 894ffde106..6fc828a43f 100644 --- a/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am @@ -25,6 +25,7 @@ libadio_piofs_la_SOURCES = \ ad_piofs.c \ ad_piofs.h \ ad_piofs_fcntl.c \ + ad_piofs_features.c \ ad_piofs_hints.c \ ad_piofs_open.c \ ad_piofs_read.c \ diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c index 9c2bd59ef1..b602c789bd 100644 --- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c @@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PIOFS_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_PIOFS_Feature, }; diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h index f39149de25..919cdb4cab 100644 --- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h @@ -35,4 +35,6 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, *error_code); void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); +void ADIOI_PIOFS_Feature(ADIO_File fd, int flag); + #endif diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c new file mode 100644 index 0000000000..11b17bf7be --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c @@ -0,0 +1,13 @@ +int ADIOI_PIOFS_Features(int flag) +{ + switch(flag) { + case ADIO_LOCKS: + case ADIO_SHARED_FP: + case ADIO_ATOMIC_MODE: + case ADIO_DATA_SIEVING_WRITES: + case ADIO_SCALABLE_OPEN: + default: + return 0; + break; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c index 57d53edc8f..295310512d 100644 --- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c @@ -25,7 +25,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -37,7 +37,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -49,7 +49,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c index 6572234632..20c3644c9b 100644 --- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c @@ -49,13 +49,13 @@ void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code) if (!err) { ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_bsu); - MPI_Info_set(fd->info, "striping_unit", value); + ADIOI_Info_set(fd->info, "striping_unit", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_cells); - MPI_Info_set(fd->info, "striping_factor", value); + ADIOI_Info_set(fd->info, "striping_factor", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_base_node); - MPI_Info_set(fd->info, "start_iodevice", value); + ADIOI_Info_set(fd->info, "start_iodevice", value); } ADIOI_Free(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c index 967eadd787..198cc9fb65 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c @@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PVFS_operations = { ADIOI_PVFS_Flush, /* Flush */ ADIOI_PVFS_Resize, /* Resize */ ADIOI_PVFS_Delete, /* Delete */ + ADIOI_PVFS_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c index 40c5434e34..710aea7708 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c @@ -17,8 +17,8 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* This must be part of the open call. can set striping parameters if necessary. */ MPI_Info_create(&(fd->info)); - MPI_Info_set(fd->info, "romio_pvfs_listio_read", "disable"); - MPI_Info_set(fd->info, "romio_pvfs_listio_write", "disable"); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable"); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable"); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE; fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE; @@ -27,7 +27,7 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -41,10 +41,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) return; /* --END ERROR HANDLING-- */ } - else MPI_Info_set(fd->info, "striping_factor", value); + else ADIOI_Info_set(fd->info, "striping_factor", value); } - MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -58,10 +58,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) return; /* --END ERROR HANDLING-- */ } - else MPI_Info_set(fd->info, "striping_unit", value); + else ADIOI_Info_set(fd->info, "striping_unit", value); } - MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); @@ -75,25 +75,25 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) return; /* --END ERROR HANDLING-- */ } - else MPI_Info_set(fd->info, "start_iodevice", value); + else ADIOI_Info_set(fd->info, "start_iodevice", value); } - MPI_Info_get(users_info, "romio_pvfs_listio_read", + ADIOI_Info_get(users_info, "romio_pvfs_listio_read", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(fd->info, "romio_pvfs_listio_read", value); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE; } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(fd->info , "romio_pvfs_listio_read", value); + ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE; } else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(fd->info, "romio_pvfs_listio_read", value); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO; } tmp_val = fd->hints->fs_hints.pvfs.listio_read; @@ -107,21 +107,21 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } } - MPI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(fd->info, "romio_pvfs_listio_write", value); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE; } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(fd->info, "romio_pvfs_listio_write", value); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE; } else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(fd->info, "romio_pvfs_listio_write", value); + ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO; } tmp_val = fd->hints->fs_hints.pvfs.listio_write; diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c index e65a7f3d74..535ed04b5b 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c @@ -37,15 +37,15 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code) value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) > 0)) pstat.pcount = atoi(value); - MPI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) > 0)) pstat.ssize = atoi(value); - MPI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) >= 0)) pstat.base = atoi(value); @@ -71,11 +71,11 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code) if (fd->fd_sys != -1) { pvfs_ioctl(fd->fd_sys, GETMETA, &pstat); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.pcount); - MPI_Info_set(fd->info, "striping_factor", value); + ADIOI_Info_set(fd->info, "striping_factor", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.ssize); - MPI_Info_set(fd->info, "striping_unit", value); + ADIOI_Info_set(fd->info, "striping_unit", value); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.base); - MPI_Info_set(fd->info, "start_iodevice", value); + ADIOI_Info_set(fd->info, "start_iodevice", value); } ADIOI_Free(value); diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c index a0e3fb8456..f6035218c3 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c @@ -43,7 +43,8 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif - fd->fp_sys_posn = offset + err; + if (err>0) + fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ @@ -63,7 +64,8 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif - fd->fp_ind += err; + if (err > 0) + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; } diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c index df261593ef..50175f3e51 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c @@ -43,7 +43,8 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif - fd->fp_sys_posn = offset + err; + if (err > 0) + fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ @@ -63,7 +64,8 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif - fd->fp_ind += err; + if (err > 0) + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; } diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am index ea21dda326..3670d07691 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am @@ -28,9 +28,15 @@ libadio_pvfs2_la_SOURCES = \ ad_pvfs2_common.c \ ad_pvfs2_delete.c \ ad_pvfs2_fcntl.c \ + ad_pvfs2_features.c \ ad_pvfs2_flush.c \ ad_pvfs2_hints.c \ + ad_pvfs2_io.h \ + ad_pvfs2_io_dtype.c \ + ad_pvfs2_io_list.c \ ad_pvfs2_open.c \ ad_pvfs2_read.c \ + ad_pvfs2_read_list_classic.c ad_pvfs2_resize.c \ - ad_pvfs2_write.c + ad_pvfs2_write.c \ + ad_pvfs2_write_list_classic.c diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c index 915be8e0b4..75ab87d854 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = { ADIOI_PVFS2_Open, /* Open */ + ADIOI_SCALEABLE_OpenColl, /* OpenColl */ ADIOI_PVFS2_ReadContig, /* ReadContig */ ADIOI_PVFS2_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -22,13 +23,8 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = { ADIOI_PVFS2_ReadStrided, /* ReadStrided */ ADIOI_PVFS2_WriteStrided, /* WriteStrided */ ADIOI_PVFS2_Close, /* Close */ -#ifdef ROMIO_HAVE_WORKING_AIO ADIOI_PVFS2_IReadContig, /* IreadContig */ ADIOI_PVFS2_IWriteContig, /* IwriteContig */ -#else - ADIOI_FAKE_IreadContig, /* IreadContig */ - ADIOI_FAKE_IwriteContig, /* IwriteContig */ -#endif ADIOI_FAKE_IODone, /* ReadDone */ ADIOI_FAKE_IODone, /* WriteDone */ ADIOI_FAKE_IOComplete, /* ReadComplete */ @@ -38,6 +34,7 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = { ADIOI_PVFS2_Flush, /* Flush */ ADIOI_PVFS2_Resize, /* Resize */ ADIOI_PVFS2_Delete, /* Delete */ + ADIOI_PVFS2_Feature, }; /* diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h index 026e400d0c..e3ff045233 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h @@ -17,7 +17,6 @@ #include "pvfs2-compat.h" #endif - void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code); void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code); void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, @@ -42,6 +41,8 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code); void ADIOI_PVFS2_Delete(char *filename, int *error_code); void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); +int ADIOI_PVFS2_Feature(ADIO_File fd, int flag); + void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, @@ -54,4 +55,12 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int flag, int *error_code); +void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); #endif diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c index 880d1e28f6..89ca5c2f6b 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c @@ -17,7 +17,6 @@ #define READ 0 #define WRITE 1 -#ifdef ROMIO_HAVE_WORKING_AIO static int ADIOI_PVFS2_greq_class = 0; int ADIOI_PVFS2_aio_free_fn(void *extra_state); int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status); @@ -168,12 +167,10 @@ int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status) aio_req = (ADIOI_AIO_Request *)extra_state; /* BUG: cannot PVFS_sys_testsome: does not work for a specific request */ - ret = PVFS_sys_wait(aio_req->op_id, __FUNCTION__, &error); + ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_aio_poll_fn", &error); if (ret == 0) { aio_req->nbytes = aio_req->resp_io.total_completed; - MPIR_Nest_incr(); MPI_Grequest_complete(aio_req->req); - MPIR_Nest_decr(); return MPI_SUCCESS; } else return MPI_UNDEFINED; /* TODO: what's this error? */ @@ -186,7 +183,7 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, ADIOI_AIO_Request **aio_reqlist; PVFS_sys_op_id *op_id_array; - int i,j, greq_count; + int i,j, greq_count, completed_count=0; int *error_array; aio_reqlist = (ADIOI_AIO_Request **)array_of_states; @@ -195,25 +192,27 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, error_array = (int *)ADIOI_Calloc(count, sizeof(int)); greq_count = count; + /* PVFS-2.6: testsome actually tests all requests and fills in op_id_array * with the ones that have completed. count is an in/out parameter. * returns with the number of completed operations. what a mess! */ - PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX); - for (i=0; i< count; i++) { - for (j=0; jop_id) { - aio_reqlist[j]->nbytes = - aio_reqlist[j]->resp_io.total_completed; - MPIR_Nest_incr(); - MPI_Grequest_complete(aio_reqlist[j]->req); - MPIR_Nest_decr(); + while (completed_count < greq_count ) { + count = greq_count; + PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX); + completed_count += count; + for (i=0; i< count; i++) { + for (j=0; jop_id) { + aio_reqlist[j]->nbytes = + aio_reqlist[j]->resp_io.total_completed; + MPI_Grequest_complete(aio_reqlist[j]->req); + } } } } return MPI_SUCCESS; /* TODO: no idea how to deal with errors */ } -#endif /* * vim: ts=8 sts=4 sw=4 noexpandtab diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c index adbd104520..4da9eac02a 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c @@ -42,6 +42,7 @@ int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval, { int error_code; ADIOI_PVFS2_End(&error_code); + MPI_Keyval_free(&keyval); return error_code; } @@ -81,7 +82,7 @@ void ADIOI_PVFS2_Init(int *error_code ) &ADIOI_PVFS2_Initialized, (void *)0); /* just like romio does, we make a dummy attribute so we * get cleaned up */ - MPI_Attr_put(MPI_COMM_WORLD, ADIOI_PVFS2_Initialized, (void *)0); + MPI_Attr_put(MPI_COMM_SELF, ADIOI_PVFS2_Initialized, (void *)0); } void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs) @@ -107,7 +108,41 @@ void ADIOI_PVFS2_makecredentials(PVFS_credentials * credentials) int ADIOI_PVFS2_error_convert(int pvfs_error) { - return MPI_UNDEFINED; + switch(pvfs_error) + { + case PVFS_EPERM: + case PVFS_EACCES: + return MPI_ERR_ACCESS; + case PVFS_ENOENT: + case PVFS_ENXIO: + case PVFS_ENODEV: + return MPI_ERR_NO_SUCH_FILE; + case PVFS_EIO: + return MPI_ERR_IO; + case PVFS_EEXIST: + return MPI_ERR_FILE_EXISTS; + case PVFS_ENOTDIR: /* ??? */ + case PVFS_EISDIR: /* ??? */ + case PVFS_ENAMETOOLONG: + return MPI_ERR_BAD_FILE; + case PVFS_EINVAL: + return MPI_ERR_FILE; + case PVFS_EFBIG: /* ??? */ + case PVFS_ENOSPC: + return MPI_ERR_NO_SPACE; + case PVFS_EROFS: + return MPI_ERR_READ_ONLY; + case PVFS_ENOSYS: + return MPI_ERR_UNSUPPORTED_OPERATION; + /* PVFS does not support quotas */ + case EDQUOT: + return MPI_ERR_QUOTA; + case PVFS_ENOMEM: + return MPI_ERR_INTERN; + default: + return MPI_UNDEFINED; + } + } /* diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c new file mode 100644 index 0000000000..71d99e67e8 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c @@ -0,0 +1,16 @@ +#include "adio.h" +#include "ad_pvfs2.h" + +int ADIOI_PVFS2_Feature(ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_SCALABLE_OPEN: + return 1; + case ADIO_SHARED_FP: + case ADIO_LOCKS: + case ADIO_SEQUENTIAL: + case ADIO_DATA_SIEVING_WRITES: + default: + return 0; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c index a752dbeade..5170e8afb7 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c @@ -17,20 +17,37 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if ((fd->info) == MPI_INFO_NULL) { /* part of the open call */ MPI_Info_create(&(fd->info)); - MPI_Info_set(fd->info, "romio_pvfs2_debugmask", "0"); + ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", "0"); fd->hints->fs_hints.pvfs2.debugmask = 0; - MPI_Info_set(fd->info, "striping_factor", "0"); + ADIOI_Info_set(fd->info, "striping_factor", "0"); fd->hints->striping_factor = 0; - MPI_Info_set(fd->info, "striping_unit", "0"); + ADIOI_Info_set(fd->info, "striping_unit", "0"); fd->hints->striping_unit = 0; + + /* disable the aggressive strided optimizations by default */ + ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", "disable"); + ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", "disable"); + fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE; + fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE; + + ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", "disable"); + ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", "disable"); + fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE; + fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE; + + ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", "disable"); + ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", "disable"); + fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE; + fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE; + /* any user-provided hints? */ if (users_info != MPI_INFO_NULL) { /* pvfs2 debugging */ value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "romio_pvfs2_debugmask", + ADIOI_Info_get(users_info, "romio_pvfs2_debugmask", MPI_MAX_INFO_VAL, value, &flag); if (flag) { tmp_value = fd->hints->fs_hints.pvfs2.debugmask = @@ -46,11 +63,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ - MPI_Info_set(fd->info, "romio_pvfs2_debugmask", value); + ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value); } /* the striping factor */ - MPI_Info_get(users_info, "striping_factor", + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { tmp_value = fd->hints->striping_factor = atoi(value); @@ -65,11 +82,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ - MPI_Info_set(fd->info, "striping_factor", value); + ADIOI_Info_set(fd->info, "striping_factor", value); } /* the striping unit */ - MPI_Info_get(users_info, "striping_unit", + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { tmp_value = fd->hints->striping_unit = atoi(value); @@ -83,16 +100,167 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ - MPI_Info_set(fd->info, "striping_unit", value); + ADIOI_Info_set(fd->info, "striping_unit", value); } /* distribution name */ - MPI_Info_get(users_info, "romio_pvfs2_distribution_name", + ADIOI_Info_get(users_info, "romio_pvfs2_distribution_name", MPI_MAX_INFO_VAL, value, &flag); if (flag) { } + + /* POSIX read */ + ADIOI_Info_get(users_info, "romio_pvfs2_posix_read", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", value); + fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_posix_read", value); + fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.posix_read; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.posix_read) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "posix_read", + error_code); + return; + } + } + + /* POSIX write */ + ADIOI_Info_get(users_info, "romio_pvfs2_posix_write", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", value); + fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_posix_write", value); + fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.posix_write; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.posix_write) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "posix_write", + error_code); + return; + } + } + + /* Datatype read */ + ADIOI_Info_get(users_info, "romio_pvfs2_dtype_read", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", value); + fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_read", value); + fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.dtype_read; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_read) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "dtype_read", + error_code); + return; + } + } + + /* Datatype write */ + ADIOI_Info_get(users_info, "romio_pvfs2_dtype_write", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", value); + fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_write", value); + fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.dtype_write; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_write) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "dtype_write", + error_code); + return; + } + } + + /* Listio read */ + ADIOI_Info_get(users_info, "romio_pvfs2_listio_read", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", value); + fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_listio_read", value); + fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.listio_read; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.listio_read) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "listio_read", + error_code); + return; + } + } + + /* Datatype write */ + ADIOI_Info_get(users_info, "romio_pvfs2_listio_write", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) { + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + { + ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", value); + fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_ENABLE; + } + else if ( !strcmp(value, "disable") || + !strcmp(value, "DISABLE")) + { + ADIOI_Info_set(fd->info , "romio_pvfs2_listio_write", value); + fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE; + } + tmp_value = fd->hints->fs_hints.pvfs2.listio_write; + MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); + if (tmp_value != fd->hints->fs_hints.pvfs2.listio_write) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "listio_write", + error_code); + return; + } + } ADIOI_Free(value); + } } /* set the values for collective I/O and data sieving parameters */ diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h new file mode 100644 index 0000000000..dc1641ee1b --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h @@ -0,0 +1,79 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2006 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +/* Contig I/O helper prototypes */ + +#define READ 0 +#define WRITE 1 + +/* #define DEBUG_CONTIG */ +/* #define DEBUG_LIST */ +/* #define DEBUG_DTYPE */ + +/* Contig I/O helper prototypes */ +int ADIOI_PVFS2_Contig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code, int rw_type); + +/* List I/O helper prototypes */ +int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code, int rw_type); + +int gen_listio_arr(ADIOI_Flatlist_node *flat_buf, + int *flat_buf_index_p, + int64_t *cur_flat_buf_reg_off_p, + int flat_buf_size, + int flat_buf_extent, + ADIOI_Flatlist_node *flat_file, + int *flat_file_index_p, + int64_t *cur_flat_file_reg_off_p, + int flat_file_size, + int flat_file_extent, + int max_ol_count, + ADIO_Offset disp, + int bytes_into_filetype, + int64_t *bytes_completed, + int64_t total_io_size, + int64_t buf_off_arr[], + int32_t buf_len_arr[], + int32_t *buf_ol_count_p, + int64_t file_off_arr[], + int32_t file_len_arr[], + int32_t *file_ol_count_p); + +void print_buf_file_ol_pairs(int64_t buf_off_arr[], + int32_t buf_len_arr[], + int32_t buf_ol_count, + int64_t file_off_arr[], + int32_t file_len_arr[], + int32_t file_ol_count, + void *buf, + int rw_type); + +/* Datatype I/O helper prototypes */ +int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code, int rw_type); + +int convert_named(MPI_Datatype *mpi_dtype, + PVFS_Request *pvfs_dtype, int combiner); + +void print_dtype_info(int combiner, + int num_int, + int num_addr, + int num_dtype, + int *arr_int, + MPI_Aint *arr_addr, + MPI_Datatype *arr_dtype); + +int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, + PVFS_Request *pvfs_dtype); + diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c new file mode 100644 index 0000000000..ff625b53f8 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c @@ -0,0 +1,720 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2006 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include "adio.h" +#include "adio_extern.h" +#include "ad_pvfs2.h" +#include "ad_pvfs2_io.h" +#include "ad_pvfs2_common.h" + +int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code, + int rw_type) +{ + int filetype_size = -1, ret = -1, filetype_is_contig = -1; + int num_filetypes = 0, cur_flat_file_reg_off = 0; + PVFS_Request tmp_mem_req, mem_req, tmp_file_req, file_req; + PVFS_sysresp_io resp_io; + ADIO_Offset off = -1, bytes_into_filetype = 0; + MPI_Aint filetype_extent = -1; + int etype_size = -1, i = -1; + PVFS_size pvfs_disp = -1; + ADIOI_Flatlist_node *flat_file_p = ADIOI_Flatlist; + + /* Use for offseting the PVFS2 filetype */ + int pvfs_blk = 1; + ADIOI_PVFS2_fs *pvfs_fs; + static char myname[] = "ADIOI_PVFS2_STRIDED_DTYPE"; + + memset(&tmp_mem_req, 0, sizeof(PVFS_Request)); + memset(&mem_req, 0, sizeof(PVFS_Request)); + memset(&tmp_file_req, 0, sizeof(PVFS_Request)); + memset(&file_req, 0, sizeof(PVFS_Request)); + + pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; + + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + /* changed below if error */ + *error_code = MPI_SUCCESS; + + /* datatype is the memory type + * fd->filetype is the file type */ + MPI_Type_size(fd->filetype, &filetype_size); + if (filetype_size == 0) { + *error_code = MPI_SUCCESS; + return -1; + } + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(fd->etype, &etype_size); + if (filetype_size == 0) { + *error_code = MPI_SUCCESS; + return -1; + } + + /* offset is in units of etype relative to the filetype. We + * convert this to off in terms of actual data bytes (the offset + * minus the number of bytes that are not used). We are allowed + * to do this since PVFS2 handles offsets with respect to a + * file_req in bytes, otherwise we would have to convert into a + * pure byte offset as is done in other methods. Explicit offset + * case is handled by using fd->disp and byte-converted off. */ + + pvfs_disp = fd->disp; + if (file_ptr_type == ADIO_INDIVIDUAL) + { + if (filetype_is_contig) + { + off = fd->fp_ind - fd->disp; + } + else + { + int flag = 0; + /* Should have already been flattened in ADIO_Open*/ + while (flat_file_p->type != fd->filetype) + { + flat_file_p = flat_file_p->next; + } + num_filetypes = -1; + while (!flag) + { + num_filetypes++; + for (i = 0; i < flat_file_p->count; i++) + { + /* Start on a non zero-length region */ + if (flat_file_p->blocklens[i]) + { + if (fd->disp + flat_file_p->indices[i] + + (num_filetypes * filetype_extent) + + flat_file_p->blocklens[i] > fd->fp_ind && + fd->disp + flat_file_p->indices[i] <= + fd->fp_ind) + { + cur_flat_file_reg_off = fd->fp_ind - + (fd->disp + flat_file_p->indices[i] + + (num_filetypes * filetype_extent)); + flag = 1; + break; + } + else + bytes_into_filetype += flat_file_p->blocklens[i]; + } + } + } + /* Impossible that we don't find it in this datatype */ + assert(i != flat_file_p->count); + off = bytes_into_filetype + cur_flat_file_reg_off; + } + } + else /* ADIO_EXPLICIT */ + { + off = etype_size * offset; + } + +#ifdef DEBUG_DTYPE + fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: (fd->fp_ind=%Ld,fd->disp=%Ld," + " offset=%Ld),(pvfs_disp=%Ld,off=%Ld)\n", + fd->fp_ind, fd->disp, offset, pvfs_disp, off); +#endif + + + /* Convert the MPI memory and file datatypes into + * PVFS2 datatypes */ + ret = convert_mpi_pvfs2_dtype(&datatype, &tmp_mem_req); + if (ret < 0) + { + goto error_state; + } + ret = convert_mpi_pvfs2_dtype(&(fd->filetype), &tmp_file_req); + if (ret < 0) + { + goto error_state; + } + + ret = PVFS_Request_contiguous(count, tmp_mem_req, &mem_req); + if (ret != 0) /* TODO: convert this to MPIO error handling */ + fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final" + " CONTIG memory type\n"); + PVFS_Request_free(&tmp_mem_req); + + /* pvfs_disp is used to offset the filetype */ + ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp, + tmp_file_req, &file_req); + if (ret != 0) + fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: error in final" + " HINDEXED file type\n"); + PVFS_Request_free(&tmp_file_req); + + if (rw_type == READ) + ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, off, buf, + mem_req, &(pvfs_fs->credentials), &resp_io); + else + ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, off, buf, + mem_req, &(pvfs_fs->credentials), &resp_io); + + if (ret != 0) { + fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_" + "read/write returned %d and completed %Ld bytes.\n", + ret, resp_io.total_completed); + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(ret), + "Error in PVFS_sys_io \n", 0); + goto error_state; + } + + if (file_ptr_type == ADIO_INDIVIDUAL) + { + fd->fp_ind = off += resp_io.total_completed; + } + + error_state: + fd->fp_sys_posn = -1; /* set it to null. */ + + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + +#ifdef DEBUG_DTYPE + fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: " + "resp_io.total_completed=%Ld,ret=%d\n", + resp_io.total_completed, ret); +#endif + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, (int)resp_io.total_completed); + /* This is a temporary way of filling in status. The right way is to + * keep track of how much data was actually acccessed by + * ADIOI_BUFFERED operations */ +#endif + return ret; +} + +/* convert_mpi_pvfs2_dtype - Convert a MPI datatype into + * a PVFS2 datatype so that we can natively use the PVFS2 + * datatypes in the PVFS2 I/O calls instead of converting + * all datatypes to the hindexed method + * return 1 - a leaf node + * return 0 - normal return + * return -1 - problems */ + +int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, + PVFS_Request *pvfs_dtype) +{ + int num_int = -1, num_addr = -1, num_dtype = -1, + combiner = -1, i = -1, ret = -1, leaf = -1; + int *arr_int = NULL, *arr_addr = NULL; + MPI_Datatype *arr_dtype = NULL; + PVFS_Request *old_pvfs_dtype = NULL; + PVFS_Request *old_pvfs_dtype_arr = NULL; + int arr_count = -1; + PVFS_size *pvfs_arr_disp = NULL; + int *pvfs_arr_len = NULL; + + MPI_Type_get_envelope(*mpi_dtype, + &num_int, + &num_addr, + &num_dtype, + &combiner); + + /* Depending on type of datatype do the following + * operations */ + + if (combiner == MPI_COMBINER_NAMED) + { + convert_named(mpi_dtype, pvfs_dtype, combiner); + return 1; + } + + /* Allocate space for the arrays necessary for + * MPI_Type_get_contents */ + + if ((arr_int = ADIOI_Malloc(sizeof(int)*num_int)) == NULL) + { + fprintf(stderr, "Failed to allocate array_int\n"); + return -1; + } + if ((arr_addr = ADIOI_Malloc(sizeof(int)*num_addr)) == NULL) + { + ADIOI_Free(arr_int); + fprintf(stderr, "Failed to allocate array_addr\n"); + return -1; + } + if ((arr_dtype = ADIOI_Malloc(sizeof(MPI_Datatype)*num_dtype)) == NULL) + { + ADIOI_Free(arr_int); + ADIOI_Free(arr_addr); + fprintf(stderr, "Failed to allocate array_dtypes\n"); + return -1; + } + + MPI_Type_get_contents(*mpi_dtype, + num_int, + num_addr, + num_dtype, + arr_int, + arr_addr, + arr_dtype); + + /* If it's not a predefined datatype, it is either a + * derived datatype or a structured datatype */ + + if (combiner != MPI_COMBINER_STRUCT) + { + if ((old_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL) + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate PVFS_Request\n"); + switch (combiner) + { + case MPI_COMBINER_CONTIGUOUS: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + ret = PVFS_Request_contiguous(arr_int[0], + *old_pvfs_dtype, pvfs_dtype); + break; + case MPI_COMBINER_VECTOR: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + ret = PVFS_Request_vector(arr_int[0], arr_int[1], + arr_int[2], *old_pvfs_dtype, + pvfs_dtype); + break; + case MPI_COMBINER_HVECTOR: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + ret = PVFS_Request_hvector(arr_int[0], arr_int[1], + arr_addr[0], *old_pvfs_dtype, + pvfs_dtype); + break; + /* Both INDEXED and HINDEXED types require PVFS_size + * address arrays. Therefore, we need to copy and + * convert the data from MPI_get_contents() into + * a PVFS_size buffer */ + case MPI_COMBINER_INDEXED: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + if ((pvfs_arr_disp = + ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) + { + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate pvfs_arr_disp\n"); + } + for (i = 0; i < arr_int[0]; i++) + { + pvfs_arr_disp[i] = + (PVFS_size) arr_int[arr_int[0]+1+i]; + } + ret = PVFS_Request_indexed(arr_int[0], &arr_int[1], + pvfs_arr_disp, + *old_pvfs_dtype, pvfs_dtype); + ADIOI_Free(pvfs_arr_disp); + break; + case MPI_COMBINER_HINDEXED: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + if ((pvfs_arr_disp = + ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) + { + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate pvfs_arr_disp\n"); + } + for (i = 0; i < arr_int[0]; i++) + { + pvfs_arr_disp[i] = + (PVFS_size) arr_addr[i]; + } + ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1], + (int64_t *)&arr_addr[0], + *old_pvfs_dtype, pvfs_dtype); + ADIOI_Free(pvfs_arr_disp); + break; + case MPI_COMBINER_DUP: + leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); + ret = PVFS_Request_contiguous(1, + *old_pvfs_dtype, pvfs_dtype); + + break; + case MPI_COMBINER_INDEXED_BLOCK: + /* No native PVFS2 support for this operation currently */ + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "INDEXED_BLOCK is unsupported\n"); + break; + case MPI_COMBINER_HINDEXED_INTEGER: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "HINDEXED_INTEGER is unsupported\n"); + break; + case MPI_COMBINER_STRUCT_INTEGER: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "STRUCT_INTEGER is unsupported\n"); + break; + case MPI_COMBINER_SUBARRAY: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "SUBARRAY is unsupported\n"); + break; + case MPI_COMBINER_DARRAY: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "DARRAY is unsupported\n"); + break; + case MPI_COMBINER_F90_REAL: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "F90_REAL is unsupported\n"); + break; + case MPI_COMBINER_F90_COMPLEX: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "F90_COMPLEX is unsupported\n"); + break; + case MPI_COMBINER_F90_INTEGER: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "F90_INTEGER is unsupported\n"); + break; + case MPI_COMBINER_RESIZED: + ADIOI_Free(old_pvfs_dtype); + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "RESIZED is unsupported\n"); + break; + default: + break; + } + + if (ret != 0) + fprintf(stderr, "Error in PVFS_Request_* " + "for a derived datatype\n"); + +#ifdef DEBUG_DTYPE + print_dtype_info(combiner, + num_int, + num_addr, + num_dtype, + arr_int, + arr_addr, + arr_dtype); +#endif + + if (leaf != 1 && combiner != MPI_COMBINER_DUP) + MPI_Type_free(&arr_dtype[0]); + + ADIOI_Free(arr_int); + ADIOI_Free(arr_addr); + ADIOI_Free(arr_dtype); + + PVFS_Request_free(old_pvfs_dtype); + ADIOI_Free(old_pvfs_dtype); + + return ret; + } + else /* MPI_COMBINER_STRUCT */ + { + MPI_Aint mpi_lb = -1, mpi_extent = -1; + PVFS_offset pvfs_lb = -1; + PVFS_size pvfs_extent = -1; + int has_lb_ub = 0; + + /* When converting into a PVFS_Request_struct, we no longer + * can use MPI_LB and MPI_UB. Therfore, we have to do the + * following. + * We simply ignore all the MPI_LB and MPI_UB types and + * get the lb and extent and pass it on through a + * PVFS resized_req */ + + arr_count = 0; + for (i = 0; i < arr_int[0]; i++) + { + if (arr_dtype[i] != MPI_LB && + arr_dtype[i] != MPI_UB) + { + arr_count++; + } + } + + if (arr_int[0] != arr_count) + { + MPI_Type_get_extent(*mpi_dtype, &mpi_lb, &mpi_extent); + pvfs_lb = mpi_lb; + pvfs_extent = mpi_extent; + if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int))) + == NULL) + { + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate pvfs_arr_len\n"); + } + has_lb_ub = 1; + } + + if ((old_pvfs_dtype_arr + = ADIOI_Malloc(arr_count*sizeof(PVFS_Request))) == NULL) + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate PVFS_Requests\n"); + + if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size))) + == NULL) + { + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate pvfs_arr_disp\n"); + } + + arr_count = 0; + for (i = 0; i < arr_int[0]; i++) + { + if (arr_dtype[i] != MPI_LB && + arr_dtype[i] != MPI_UB) + { + leaf = convert_mpi_pvfs2_dtype( + &arr_dtype[i], &old_pvfs_dtype_arr[arr_count]); + if (leaf != 1) + MPI_Type_free(&arr_dtype[i]); + pvfs_arr_disp[arr_count] = + (PVFS_size) arr_addr[i]; + if (has_lb_ub) + { + pvfs_arr_len[arr_count] = + arr_int[i+1]; + } + arr_count++; + } + } + + /* If a MPI_UB or MPI_LB did exist, we have to + * resize the datatype */ + if (has_lb_ub) + { + PVFS_Request *tmp_pvfs_dtype = NULL; + if ((tmp_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL) + fprintf(stderr, "convert_mpi_pvfs2_dtype: " + "Failed to allocate PVFS_Request\n"); + + ret = PVFS_Request_struct(arr_count, pvfs_arr_len, + pvfs_arr_disp, + old_pvfs_dtype_arr, tmp_pvfs_dtype); + if (ret != 0) + fprintf(stderr, "Error in PVFS_Request_struct\n"); + + arr_count = 0; + for (i = 0; i < arr_int[0]; i++) + { + if (arr_dtype[i] != MPI_LB && + arr_dtype[i] != MPI_UB) + { + PVFS_Request_free(&old_pvfs_dtype_arr[arr_count]); + arr_count++; + } + } + +#ifdef DEBUG_DTYPE + fprintf(stderr, "STRUCT(WITHOUT %d LB or UB)(%d,[", + arr_int[0] - arr_count, arr_count); + for (i = 0; i < arr_count; i++) + fprintf(stderr, "(%d,%Ld) ", + pvfs_arr_len[i], + pvfs_arr_disp[i]); + fprintf(stderr, "]\n"); + fprintf(stderr, "RESIZED(LB = %Ld, EXTENT = %Ld)\n", + pvfs_lb, pvfs_extent); +#endif + ret = PVFS_Request_resized(*tmp_pvfs_dtype, + pvfs_lb, pvfs_extent, pvfs_dtype); + if (ret != 0) + fprintf(stderr, "Error in PVFS_Request_resize\n"); + + PVFS_Request_free(tmp_pvfs_dtype); + ADIOI_Free(tmp_pvfs_dtype); + } + else /* No MPI_LB or MPI_UB datatypes */ + { + ret = PVFS_Request_struct(arr_int[0], &arr_int[1], + pvfs_arr_disp, + old_pvfs_dtype_arr, pvfs_dtype); + if (ret != 0) + fprintf(stderr, "Error in PVFS_Request_struct\n"); + + for (i = 0; i < arr_int[0]; i++) + { + if (arr_dtype[i] != MPI_LB && + arr_dtype[i] != MPI_UB) + PVFS_Request_free(&old_pvfs_dtype_arr[i]); + } + +#ifdef DEBUG_DTYPE + print_dtype_info(combiner, + num_int, + num_addr, + num_dtype, + arr_int, + arr_addr, + arr_dtype); +#endif + } + + ADIOI_Free(arr_int); + ADIOI_Free(arr_addr); + ADIOI_Free(arr_dtype); + + ADIOI_Free(old_pvfs_dtype_arr); + ADIOI_Free(pvfs_arr_disp); + ADIOI_Free(pvfs_arr_len); + + return ret; + } + + /* Shouldn't have gotten here */ + fprintf(stderr, "convert_mpi_pvfs2_dtype: SERIOUS ERROR\n"); + return -1; +} + +int convert_named(MPI_Datatype *mpi_dtype, + PVFS_Request *pvfs_dtype, int combiner) +{ + int ret = -1; +#ifdef DEBUG_DTYPE + fprintf(stderr, "NAMED"); +#endif + + switch (*mpi_dtype) + { + case MPI_CHAR: + ret = PVFS_Request_contiguous(1, PVFS_CHAR, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_CHAR\n"); +#endif + break; + case MPI_BYTE: + ret = PVFS_Request_contiguous(1, PVFS_BYTE, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_BYTE\n"); +#endif + break; + case MPI_SHORT: + ret = PVFS_Request_contiguous(1, PVFS_SHORT, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_SHORT\n"); +#endif + break; + case MPI_INT: + ret = PVFS_Request_contiguous(1, PVFS_INT, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_INT\n"); +#endif + break; + case MPI_LONG: + ret = PVFS_Request_contiguous(1, PVFS_LONG, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_LONG\n"); +#endif + break; + case MPI_FLOAT: + ret = PVFS_Request_contiguous(1, PVFS_FLOAT, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_FLOAT\n"); +#endif + break; + case MPI_DOUBLE: + ret = PVFS_Request_contiguous(1, PVFS_DOUBLE, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_DOUBLE\n"); +#endif + break; + case MPI_UNSIGNED_CHAR: + ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_CHAR, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_UNSIGNED_CHAR\n"); +#endif + break; + case MPI_UNSIGNED_SHORT: + ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_UNSIGNED_SHORT\n"); +#endif + break; + case MPI_UNSIGNED: + ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_SHORT\n"); +#endif + break; + case MPI_UNSIGNED_LONG: + ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_LONG, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_UNSIGNED_LONG\n"); +#endif + break; + case MPI_LONG_DOUBLE: + ret = PVFS_Request_contiguous(1, PVFS_LONG_DOUBLE, pvfs_dtype); +#ifdef DEBUG_DTYPE + fprintf(stderr, "-MPI_LONG_DOUBLE\n"); +#endif + break; + default: + fprintf(stderr, "convert_named: predefined type not found"); + return -1; + break; + } + if (ret != 0) + fprintf(stderr, "convert_named: Datatype creation failed\n"); + return ret; +} + +void print_dtype_info(int combiner, + int num_int, + int num_addr, + int num_dtype, + int *arr_int, + MPI_Aint *arr_addr, + MPI_Datatype *arr_dtype) +{ + int i = -1; + switch (combiner) + { + case MPI_COMBINER_CONTIGUOUS: + fprintf(stderr, "CONTIG(%d)\n", arr_int[0]); + break; + case MPI_COMBINER_VECTOR: + fprintf(stderr, "VECTOR(%d,%d,%d)\n", + arr_int[0], arr_int[1], arr_int[2]); + break; + case MPI_COMBINER_HVECTOR: + fprintf(stderr, "HVECTOR(%d,%d,%d)\n", + arr_int[0], arr_int[1],arr_addr[0]); + break; + case MPI_COMBINER_INDEXED: + fprintf(stderr, "INDEXED(%d,[", + arr_int[0]); + for (i = 0; i < arr_int[0]; i++) + fprintf(stderr, "(%d,%Ld) ", + arr_int[1+i], + (int64_t) arr_int[arr_int[0]+1+i]); + fprintf(stderr, "]\n"); + break; + case MPI_COMBINER_HINDEXED: + fprintf(stderr, "HINDEXED(%d,[", + arr_int[0]); + for (i = 0; i < arr_int[0]; i++) + fprintf(stderr, "(%d,%Ld) ", + arr_int[1+i], + (int64_t) arr_addr[i]); + fprintf(stderr, "]\n"); + break; + case MPI_COMBINER_STRUCT: + fprintf(stderr, "STRUCT(%d,[", + arr_int[0]); + for (i = 0; i < arr_int[0]; i++) + fprintf(stderr, "(%d,%Ld) ", + arr_int[1+i], + (int64_t) arr_addr[i]); + fprintf(stderr, "]\n"); + break; + case MPI_COMBINER_DUP: + fprintf(stderr, "DUP\n"); + break; + default: + fprintf(stderr, "no available information on this datatype"); + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c new file mode 100644 index 0000000000..38cc63e3e1 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c @@ -0,0 +1,665 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2006 Unknown (TODO: fix this) + */ + +#include +#include "adio.h" +#include "adio_extern.h" +#include "ad_pvfs2.h" +#include "ad_pvfs2_io.h" +#include "ad_pvfs2_common.h" + +#define COALESCE_REGIONS /* TODO: would we ever want to *not* coalesce? */ +#define MAX_OL_COUNT 64 +int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code, int rw_type) +{ + /* list I/O parameters */ + int i = -1, ret = -1; + int tmp_filetype_size = -1; + int64_t cur_io_size = 0, io_size = 0; + int etype_size = -1; + int num_etypes_in_filetype = -1, num_filetypes = -1; + int etypes_in_filetype = -1, size_in_filetype = -1; + int bytes_into_filetype = 0; + MPI_Offset total_bytes_accessed = 0; + + /* parameters for offset-length pairs arrays */ + int64_t buf_off_arr[MAX_OL_COUNT]; + int32_t buf_len_arr[MAX_OL_COUNT]; + int64_t file_off_arr[MAX_OL_COUNT]; + int32_t file_len_arr[MAX_OL_COUNT]; + int32_t buf_ol_count = 0; + int32_t file_ol_count = 0; + + /* parameters for flattened memory and file datatypes*/ + int flat_buf_index = 0; + int flat_file_index = 0; + int64_t cur_flat_buf_reg_off = 0; + int64_t cur_flat_file_reg_off = 0; + ADIOI_Flatlist_node *flat_buf_p, *flat_file_p; + int buftype_size = -1, buftype_extent = -1, + filetype_size = -1, filetype_extent = -1; + int buftype_is_contig = -1, filetype_is_contig = -1; + + /* PVFS2 specific parameters */ + PVFS_Request mem_req, file_req; + ADIOI_PVFS2_fs * pvfs_fs; + PVFS_sysresp_io resp_io; + static char myname[] = "ADIOI_PVFS2_STRIDED_LISTIO"; + + if (fd->atomicity) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_ARG, + "Atomic noncontiguous writes" + " are not supported by PVFS2", 0); + return -1; + } + + MPI_Type_size(fd->filetype, &filetype_size); + if (filetype_size == 0) { + *error_code = MPI_SUCCESS; + return -1; + } + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + io_size = buftype_size*count; + + pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; + + /* Flatten the memory datatype + * (file datatype has already been flattened in ADIO open + * unless it is contibuous, then we need to flatten it manually) + * and set the correct buffers for flat_buf and flat_file */ + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + if (buftype_is_contig == 0) + { + ADIOI_Flatten_datatype(datatype); + flat_buf_p = ADIOI_Flatlist; + while (flat_buf_p->type != datatype) + flat_buf_p = flat_buf_p->next; + } + else + { + /* flatten and add to the list */ + flat_buf_p = (ADIOI_Flatlist_node *) ADIOI_Malloc + (sizeof(ADIOI_Flatlist_node)); + flat_buf_p->blocklens = (ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset)); + flat_buf_p->indices = + (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset)); + /* For the buffer, we can optimize the buftype, this is not + * possible with the filetype since it is tiled */ + buftype_size = buftype_size*count; + buftype_extent = buftype_size*count; + flat_buf_p->blocklens[0] = buftype_size; + flat_buf_p->indices[0] = 0; + flat_buf_p->count = 1; + } + if (filetype_is_contig == 0) + { + /* TODO: why does avery say this should already have been + * flattened in Open, but also says contig types don't get + * flattened */ + ADIOI_Flatten_datatype(fd->filetype); + flat_file_p = ADIOI_Flatlist; + while (flat_file_p->type != fd->filetype) + flat_file_p = flat_file_p->next; + } + else + { + /* flatten and add to the list */ + flat_file_p = (ADIOI_Flatlist_node *) ADIOI_Malloc + (sizeof(ADIOI_Flatlist_node)); + flat_file_p->blocklens =(ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset)); + flat_file_p->indices = + (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset)); + flat_file_p->blocklens[0] = filetype_size; + flat_file_p->indices[0] = 0; + flat_file_p->count = 1; + } + + /* Find out where we are in the flattened filetype (the block index, + * how far into the block, and how many bytes_into_filetype) + * If the file_ptr_type == ADIO_INDIVIDUAL we will use disp, fp_ind + * to figure this out (offset should always be zero) + * If file_ptr_type == ADIO_EXPLICIT, we will use disp and offset + * to figure this out. */ + + etype_size = fd->etype_size; + num_etypes_in_filetype = filetype_size / etype_size; + if (file_ptr_type == ADIO_INDIVIDUAL) + { + int flag = 0; + /* Should have already been flattened in ADIO_Open*/ + num_filetypes = -1; + while (!flag) + { + num_filetypes++; + for (i = 0; i < flat_file_p->count; i++) + { + /* Start on a non zero-length region */ + if (flat_file_p->blocklens[i]) + { + if (fd->disp + flat_file_p->indices[i] + + (num_filetypes * filetype_extent) + + flat_file_p->blocklens[i] > fd->fp_ind && + fd->disp + flat_file_p->indices[i] <= + fd->fp_ind) + { + flat_file_index = i; + cur_flat_file_reg_off = fd->fp_ind - + (fd->disp + flat_file_p->indices[i] + + (num_filetypes * filetype_extent)); + flag = 1; + break; + } + else + bytes_into_filetype += flat_file_p->blocklens[i]; + } + } + } + /* Impossible that we don't find it in this datatype */ + assert(i != flat_file_p->count); + } + else + { + num_filetypes = (int) (offset / num_etypes_in_filetype); + etypes_in_filetype = (int) (offset % num_etypes_in_filetype); + size_in_filetype = etypes_in_filetype * etype_size; + + tmp_filetype_size = 0; + for (i=0; icount; i++) { + tmp_filetype_size += flat_file_p->blocklens[i]; + if (tmp_filetype_size > size_in_filetype) + { + flat_file_index = i; + cur_flat_file_reg_off = flat_file_p->blocklens[i] - + (tmp_filetype_size - size_in_filetype); + bytes_into_filetype = offset * filetype_size - + flat_file_p->blocklens[i]; + break; + } + } + } +#ifdef DEBUG_LIST + fprintf(stderr, "ADIOI_PVFS2_StridedListIO: (fd->fp_ind=%Ld,fd->disp=%Ld," + " offset=%Ld)\n(flat_file_index=%d,cur_flat_file_reg_off=%Ld," + "bytes_into_filetype=%d)\n", + fd->fp_ind, fd->disp, offset, flat_file_index, + cur_flat_file_reg_off, bytes_into_filetype); +#endif +#ifdef DEBUG_LIST2 + fprintf(stderr, "flat_buf:\n"); + for (i = 0; i < flat_buf_p->count; i++) + fprintf(stderr, "(offset, length) = (%Ld, %d)\n", + flat_buf_p->indices[i], + flat_buf_p->blocklens[i]); + fprintf(stderr, "flat_file:\n"); + for (i = 0; i < flat_file_p->count; i++) + fprintf(stderr, "(offset, length) = (%Ld, %d)\n", + flat_file_p->indices[i], + flat_file_p->blocklens[i]); +#endif + + /* total data written */ + cur_io_size = 0; + while (cur_io_size != io_size) + { + /* Initialize the temporarily unrolling lists and + * and associated variables */ + buf_ol_count = 0; + file_ol_count = 0; + for (i = 0; i < MAX_OL_COUNT; i++) + { + buf_off_arr[i] = 0; + buf_len_arr[i] = 0; + file_off_arr[i] = 0; + file_len_arr[i] = 0; + } + + /* Generate the offset-length pairs for a + * list I/O operation */ + gen_listio_arr(flat_buf_p, + &flat_buf_index, + &cur_flat_buf_reg_off, + buftype_size, + buftype_extent, + flat_file_p, + &flat_file_index, + &cur_flat_file_reg_off, + filetype_size, + filetype_extent, + MAX_OL_COUNT, + fd->disp, + bytes_into_filetype, + &cur_io_size, + io_size, + buf_off_arr, + buf_len_arr, + &buf_ol_count, + file_off_arr, + file_len_arr, + &file_ol_count); + + assert(buf_ol_count <= MAX_OL_COUNT); + assert(file_ol_count <= MAX_OL_COUNT); +#ifdef DEBUG_LIST2 + print_buf_file_ol_pairs(buf_off_arr, + buf_len_arr, + buf_ol_count, + file_off_arr, + file_len_arr, + file_ol_count, + buf, + rw_type); +#endif +#ifdef DEBUG_LIST2 + do { + int y, z; + fprintf(stderr, "ad_pvfs2_io_list.c::\n"); + for (y = 0; y < buf_ol_count; y++) + { + for (z = 0; z < buf_len_arr[y]; z++) + { + fprintf(stderr, "buf[%d][%d]=%c\n", + y, z, ((char *) buf + buf_off_arr[y])[z]); + } + } + } while (0); +#endif + + /* Run list I/O operation */ + ret = PVFS_Request_hindexed(buf_ol_count, buf_len_arr, + buf_off_arr, PVFS_BYTE, &mem_req); + + ret = PVFS_Request_hindexed(file_ol_count, file_len_arr, + file_off_arr, PVFS_BYTE, &file_req); + if (rw_type == READ) + { + ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, + buf, mem_req, + &(pvfs_fs->credentials), &resp_io); + } + else + { + ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, + buf, mem_req, + &(pvfs_fs->credentials), &resp_io); + } + if (ret != 0) + { + fprintf(stderr, "ADIOI_PVFS2_StridedListIO: Warning - PVFS_sys_" + "read/write returned %d and completed %Ld bytes.\n", + ret, resp_io.total_completed); + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(ret), + "Error in PVFS_sys_io \n", 0); + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + goto error_state; + } + total_bytes_accessed += resp_io.total_completed; + + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + } + +#ifdef DEBUG_LIST + fprintf(stderr, "ADIOI_PVFS2_StridedListIO: " + "total_bytes_accessed=%Ld,ret=%d\n", + total_bytes_accessed, ret); +#endif + + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind += total_bytes_accessed; + *error_code = MPI_SUCCESS; + +error_state: +#ifdef HAVE_STATUS_SET_BYTES + /* TODO: why the cast? */ + MPIR_Status_set_bytes(status, datatype, (int)total_bytes_accessed); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + if (buftype_is_contig == 0) + ADIOI_Delete_flattened(datatype); + else + { + ADIOI_Free(flat_buf_p->blocklens); + ADIOI_Free(flat_buf_p->indices); + ADIOI_Free(flat_buf_p); + } + + if (filetype_is_contig == 0) + ADIOI_Delete_flattened(fd->filetype); + else + { + ADIOI_Free(flat_file_p->blocklens); + ADIOI_Free(flat_file_p->indices); + ADIOI_Free(flat_file_p); + } + + return 0; +} + +/* To do: Fix the code to coalesce the offset-length pairs for memory + * and file. */ + +/* gen_listio_arr - fills in offset-length pairs for memory and file + * for list I/O */ +int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, + int *flat_buf_index_p, + int64_t *cur_flat_buf_reg_off_p, + int flat_buf_size, + int flat_buf_extent, + ADIOI_Flatlist_node *flat_file_p, + int *flat_file_index_p, + int64_t *cur_flat_file_reg_off_p, + int flat_file_size, + int flat_file_extent, + int max_ol_count, + ADIO_Offset disp, + int bytes_into_filetype, + int64_t *bytes_completed, + int64_t total_io_size, + int64_t buf_off_arr[], + int32_t buf_len_arr[], + int32_t *buf_ol_count_p, + int64_t file_off_arr[], + int32_t file_len_arr[], + int32_t *file_ol_count_p) +{ + int region_size = -1; + + /* parameters for flattened memory and file datatypes*/ + int64_t cur_flat_buf_reg_left = 0; + int64_t cur_flat_file_reg_left = 0; + +#ifdef DEBUG_LIST2 + fprintf(stderr, "gen_list_arr:\n"); +#endif + + if ((*buf_ol_count_p) != 0 ||(*file_ol_count_p) != 0) + { + fprintf(stderr, "buf_ol_count != 0 || file_ol_count != 0\n"); + return -1; + } + + /* Start on a non-zero memory and file region + * Note this does not affect the bytes_completed + * since no data is in these regions. Initialize the + * first memory and file offsets. */ + while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0) + { + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + flat_buf_p->count; + } + buf_off_arr[*buf_ol_count_p] = + (*bytes_completed / flat_buf_size) * + flat_buf_extent + + flat_buf_p->indices[*flat_buf_index_p] + + *cur_flat_buf_reg_off_p; + buf_len_arr[*buf_ol_count_p] = 0; + + while (flat_file_p->blocklens[(*flat_file_index_p)] == 0) + { + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + flat_file_p->count; + } + file_off_arr[*file_ol_count_p] = disp + + (((bytes_into_filetype + *bytes_completed) / flat_file_size) * + flat_file_extent) + + flat_file_p->indices[*flat_file_index_p] + + *cur_flat_file_reg_off_p; + file_len_arr[*file_ol_count_p] = 0; + +#ifdef DEBUG_LIST2 + fprintf(stderr, "initial buf_off_arr[%d] = %Ld\n", *buf_ol_count_p, + buf_off_arr[*buf_ol_count_p]); + fprintf(stderr, "initial file_off_arr[%d] = %Ld\n", *file_ol_count_p, + file_off_arr[*file_ol_count_p]); +#endif + + while (*bytes_completed != total_io_size + && (*buf_ol_count_p) < max_ol_count + && (*file_ol_count_p) < max_ol_count) + { + /* How much data is left in the current piece in + * the flattened datatypes */ + cur_flat_buf_reg_left = flat_buf_p->blocklens[*flat_buf_index_p] + - *cur_flat_buf_reg_off_p; + cur_flat_file_reg_left = flat_file_p->blocklens[*flat_file_index_p] + - *cur_flat_file_reg_off_p; + +#ifdef DEBUG_LIST2 + fprintf(stderr, + "flat_buf_index=%d flat_buf->blocklens[%d]=%d\n" + "cur_flat_buf_reg_left=%Ld " + "*cur_flat_buf_reg_off_p=%Ld\n" + "flat_file_index=%d flat_file->blocklens[%d]=%d\n" + "cur_flat_file_reg_left=%Ld " + "*cur_flat_file_reg_off_p=%Ld\n" + "bytes_completed=%Ld\n" + "buf_ol_count=%d file_ol_count=%d\n" + "buf_len_arr[%d]=%d file_len_arr[%d]=%d\n\n", + *flat_buf_index_p, *flat_buf_index_p, + flat_buf_p->blocklens[*flat_buf_index_p], + cur_flat_buf_reg_left, + *cur_flat_buf_reg_off_p, + *flat_file_index_p, *flat_file_index_p, + flat_file_p->blocklens[*flat_file_index_p], + cur_flat_file_reg_left, + *cur_flat_file_reg_off_p, + *bytes_completed, + *buf_ol_count_p, *file_ol_count_p, + *buf_ol_count_p, + buf_len_arr[*buf_ol_count_p], + *file_ol_count_p, + file_len_arr[*file_ol_count_p]); +#endif + + /* What is the size of the next contiguous region agreed + * upon by both memory and file regions that does not + * surpass the file size */ + if (cur_flat_buf_reg_left > cur_flat_file_reg_left) + region_size = cur_flat_file_reg_left; + else + region_size = cur_flat_buf_reg_left; + + if (region_size > total_io_size - *bytes_completed) + region_size = total_io_size - *bytes_completed; + + /* Add this piece to both the mem and file arrays + * coalescing offset-length pairs if possible and advance + * the pointers through the flatten mem and file datatypes + * as well Note: no more than a single piece can be done + * since we take the smallest one possible */ + + if (cur_flat_buf_reg_left == region_size) + { +#ifdef DEBUG_LIST2 + fprintf(stderr, "reached end of memory block...\n"); +#endif + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + flat_buf_p->count; + while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0) + { + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + flat_buf_p->count; + } + *cur_flat_buf_reg_off_p = 0; + +#ifdef COALESCE_REGIONS + if (*buf_ol_count_p != 0) + { + if (buf_off_arr[(*buf_ol_count_p) - 1] + + buf_len_arr[(*buf_ol_count_p) - 1] == + buf_off_arr[*buf_ol_count_p]) + { + buf_len_arr[(*buf_ol_count_p) - 1] += + region_size; + } + else + { + buf_len_arr[*buf_ol_count_p] += region_size; + (*buf_ol_count_p)++; + } + } + else + { +#endif + buf_len_arr[*buf_ol_count_p] += region_size; + (*buf_ol_count_p)++; +#ifdef COALESCE_REGIONS + } +#endif + + /* Don't prepare for the next piece if we have reached + * the limit or else it will segment fault. */ + if ((*buf_ol_count_p) != max_ol_count) + { + buf_off_arr[*buf_ol_count_p] = + ((*bytes_completed + region_size) / flat_buf_size) * + flat_buf_extent + + flat_buf_p->indices[*flat_buf_index_p] + + (*cur_flat_buf_reg_off_p); + buf_len_arr[*buf_ol_count_p] = 0; + } + } + else if (cur_flat_buf_reg_left > region_size) + { +#ifdef DEBUG_LIST2 + fprintf(stderr, "advanced %d in memory block...\n", + region_size); +#endif + (*cur_flat_buf_reg_off_p) += region_size; + buf_len_arr[*buf_ol_count_p] += region_size; + } + else + { + fprintf(stderr, "gen_listio_arr: Error\n"); + } + + /* To calculate the absolute file offset we need to + * add the disp, how many filetypes we have gone through, + * the relative block offset in the filetype and how far + * into the block we have gone. */ + if (cur_flat_file_reg_left == region_size) + { +#ifdef DEBUG_LIST2 + fprintf(stderr, "reached end of file block...\n"); +#endif + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + flat_file_p->count; + while (flat_file_p->blocklens[(*flat_file_index_p)] == 0) + { + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + flat_file_p->count; + } + (*cur_flat_file_reg_off_p) = 0; + +#ifdef COALESCE_REGIONS + if (*file_ol_count_p != 0) + { + if (file_off_arr[(*file_ol_count_p) - 1] + + file_len_arr[(*file_ol_count_p) - 1] == + file_off_arr[*file_ol_count_p]) + { + file_len_arr[(*file_ol_count_p) - 1] += + region_size; + } + else + { + file_len_arr[*file_ol_count_p] += region_size; + (*file_ol_count_p)++; + } + } + else + { +#endif + file_len_arr[*file_ol_count_p] += region_size; + (*file_ol_count_p)++; +#ifdef COALESCE_REGIONS + } +#endif + + /* Don't prepare for the next piece if we have reached + * the limit or else it will segment fault. */ + if ((*file_ol_count_p) != max_ol_count) + { + file_off_arr[*file_ol_count_p] = disp + + (((bytes_into_filetype + *bytes_completed + region_size) + / flat_file_size) * + flat_file_extent) + + flat_file_p->indices[*flat_file_index_p] + + (*cur_flat_file_reg_off_p); + file_len_arr[*file_ol_count_p] = 0; + } + } + else if (cur_flat_file_reg_left > region_size) + { +#ifdef DEBUG_LIST2 + fprintf(stderr, "advanced %d in file block...\n", + region_size); +#endif + (*cur_flat_file_reg_off_p) += region_size; + file_len_arr[*file_ol_count_p] += region_size; + } + else + { + fprintf(stderr, "gen_listio_arr: Error\n"); + } +#ifdef DEBUG_LIST2 + fprintf(stderr, + "------------------------------\n\n"); +#endif + *bytes_completed += region_size; + } + /* Increment the count if we stopped in the middle of a + * memory or file region */ + if (*cur_flat_buf_reg_off_p != 0) + (*buf_ol_count_p)++; + if (*cur_flat_file_reg_off_p != 0) + (*file_ol_count_p)++; + + return 0; +} + +void print_buf_file_ol_pairs(int64_t buf_off_arr[], + int32_t buf_len_arr[], + int32_t buf_ol_count, + int64_t file_off_arr[], + int32_t file_len_arr[], + int32_t file_ol_count, + void *buf, + int rw_type) +{ + int i = -1; + + fprintf(stderr, "buf_ol_pairs(offset,length) count = %d\n", + buf_ol_count); + for (i = 0; i < buf_ol_count; i++) + { + fprintf(stderr, "(%Ld, %d) ", buf_off_arr[i], buf_len_arr[i]); + } + fprintf(stderr, "\n"); + + fprintf(stderr, "file_ol_pairs(offset,length) count = %d\n", + file_ol_count); + for (i = 0; i < file_ol_count; i++) + { + fprintf(stderr, "(%Ld, %d) ", file_off_arr[i], file_len_arr[i]); + } + fprintf(stderr, "\n\n"); + +} diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c index 2dd7e55ca1..48009f2537 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c @@ -8,7 +8,7 @@ #include "adio.h" #include "adio_extern.h" #include "ad_pvfs2.h" - +#include "ad_pvfs2_io.h" #include "ad_pvfs2_common.h" void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, @@ -92,899 +92,77 @@ fn_exit: return; } +static int ADIOI_PVFS2_ReadStridedListIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + return ADIOI_PVFS2_StridedListIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, + error_code, READ); +} + +static int ADIOI_PVFS2_ReadStridedDtypeIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code, + READ); +} void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - /* offset is in units of etype relative to the filetype. */ - ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, brd_size, frd_size=0, st_index=0; - int bufsize, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; - int buf_count, buftype_is_contig, filetype_is_contig; - ADIO_Offset off, disp, start_off, initial_off; - int flag, st_frd_size, st_n_filetypes; + /* four ways (to date) that we can carry out strided i/o accesses: + * - naive posix + * - 'true' Datatype (from avery) + * - new List I/O (from avery) + * - classic List I/O (the one that's always been in ROMIO) + * I imagine we'll keep Datatype as an optional optimization, and afer a + * release or two promote it to the default + */ + int ret = -1; - int mem_list_count, file_list_count; - PVFS_size *mem_offsets; - int64_t *file_offsets; - int *mem_lengths; - int32_t *file_lengths; - int total_blks_to_read; - - int max_mem_list, max_file_list; - - int b_blks_read; - int f_data_read; - int size_read=0, n_read_lists, extra_blks; - - int end_brd_size, end_frd_size; - int start_k, start_j, new_file_read, new_buffer_read; - int start_mem_offset; - PVFS_Request mem_req, file_req; - ADIOI_PVFS2_fs * pvfs_fs; - PVFS_sysresp_io resp_io; - int err_flag=0; - MPI_Offset total_bytes_read = 0; - static char myname[] = "ADIOI_PVFS2_ReadStrided"; - -#define MAX_ARRAY_SIZE 64 - - *error_code = MPI_SUCCESS; /* changed below if error */ - - ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); - ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); - - /* the HDF5 tests showed a bug in this list processing code (see many many - * lines down below). We added a workaround, but common HDF5 file types - * are actually contiguous and do not need the expensive workarond */ - if (!filetype_is_contig) { - flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) flat_file = flat_file->next; - if (flat_file->count == 1 && !buftype_is_contig) - filetype_is_contig = 1; - } - - MPI_Type_size(fd->filetype, &filetype_size); - if ( ! filetype_size ) { - *error_code = MPI_SUCCESS; + if (fd->hints->fs_hints.pvfs2.posix_read == ADIOI_HINT_ENABLE) { + ADIOI_GEN_ReadStrided(fd, buf, count, datatype, + file_ptr_type, offset, status, error_code); return; } + if (fd->hints->fs_hints.pvfs2.dtype_read == ADIOI_HINT_ENABLE) { + ret = ADIOI_PVFS2_ReadStridedDtypeIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code); - MPI_Type_extent(fd->filetype, &filetype_extent); - MPI_Type_size(datatype, &buftype_size); - MPI_Type_extent(datatype, &buftype_extent); - etype_size = fd->etype_size; - - bufsize = buftype_size * count; - - pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; - - if (!buftype_is_contig && filetype_is_contig) { - -/* noncontiguous in memory, contiguous in file. */ - int64_t file_offsets; - int32_t file_lengths; - - ADIOI_Flatten_datatype(datatype); - flat_buf = ADIOI_Flatlist; - while (flat_buf->type != datatype) flat_buf = flat_buf->next; - - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; - - file_list_count = 1; - file_offsets = off; - file_lengths = 0; - total_blks_to_read = count*flat_buf->count; - b_blks_read = 0; - - /* allocate arrays according to max usage */ - if (total_blks_to_read > MAX_ARRAY_SIZE) - mem_list_count = MAX_ARRAY_SIZE; - else mem_list_count = total_blks_to_read; - mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); - mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); - - /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */ - - j = 0; - /* step through each block in memory, filling memory arrays */ - while (b_blks_read < total_blks_to_read) { - for (i=0; icount; i++) { - mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = - /* TODO: fix this compiler warning */ - ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = - flat_buf->blocklens[i]; - file_lengths += flat_buf->blocklens[i]; - b_blks_read++; - if (!(b_blks_read % MAX_ARRAY_SIZE) || - (b_blks_read == total_blks_to_read)) { - - /* in the case of the last read list call, - adjust mem_list_count */ - if (b_blks_read == total_blks_to_read) { - mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE; - /* in case last read list call fills max arrays */ - if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; - } - err_flag = PVFS_Request_hindexed(mem_list_count, - mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); - if (err_flag < 0) break; - err_flag = PVFS_Request_contiguous(file_lengths, - PVFS_BYTE, &file_req); - if (err_flag < 0) break; -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); -#endif - err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, - file_offsets, PVFS_BOTTOM, mem_req, - &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_read", 0); - goto error_state; - } - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - total_bytes_read += resp_io.total_completed; - /* --END ERROR HANDLING-- */ - - /* in the case of error or the last read list call, - * leave here */ - if (err_flag || b_blks_read == total_blks_to_read) break; - - file_offsets += file_lengths; - file_lengths = 0; - } - } /* for (i=0; icount; i++) */ - j++; - } /* while (b_blks_read < total_blks_to_read) */ - ADIOI_Free(mem_offsets); - ADIOI_Free(mem_lengths); - - if (file_ptr_type == ADIO_INDIVIDUAL) - fd->fp_ind += total_bytes_read; - - fd->fp_sys_posn = -1; /* set it to null. */ - -#ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, bufsize); - /* This isa temporary way of filling in status. The right way is to - keep tracke of how much data was actually read adn placed in buf - by ADIOI_BUFFERED_READ. */ -#endif - ADIOI_Delete_flattened(datatype); - + /* Fall back to list I/O if datatype I/O didn't work */ + if (ret != 0) + { + fprintf(stderr, + "Falling back to list I/O since datatype I/O failed\n"); + ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code); + } + return; + } + if (fd->hints->fs_hints.pvfs2.listio_read == ADIOI_HINT_ENABLE) { + ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count, datatype, + file_ptr_type, offset, status, error_code); return; - } /* if (!buftype_is_contig && filetype_is_contig) */ - - /* know file is noncontiguous from above */ - /* noncontiguous in file */ - - /* filetype already flattened in ADIO_Open */ - flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) flat_file = flat_file->next; - - disp = fd->disp; - initial_off = offset; - - - /* for each case - ADIO_Individual pointer or explicit, find the file - offset in bytes (offset), n_filetypes (how many filetypes into - file to start), frd_size (remaining amount of data in present - file block), and st_index (start point in terms of blocks in - starting filetype) */ - if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - ((ADIO_Offset) n_filetypes)*filetype_extent + - flat_file->blocklens[i] >= offset) { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - ((ADIO_Offset) n_filetypes)*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } /* while (!flag) */ - } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ - else { - n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); - size_in_filetype = etype_in_filetype * etype_size; - - sum = 0; - for (i=0; icount; i++) { - sum += flat_file->blocklens[i]; - if (sum > size_in_filetype) { - st_index = i; - frd_size = sum - size_in_filetype; - abs_off_in_filetype = flat_file->indices[i] + - size_in_filetype - (sum - flat_file->blocklens[i]); - break; - } - } - - /* abs. offset in bytes in the file */ - offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + - abs_off_in_filetype; - } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ - - start_off = offset; - st_frd_size = frd_size; - st_n_filetypes = n_filetypes; - - if (buftype_is_contig && !filetype_is_contig) { - -/* contiguous in memory, noncontiguous in file. should be the most - common case. */ - - int mem_lengths; - char *mem_offsets; - - i = 0; - j = st_index; - n_filetypes = st_n_filetypes; - - mem_list_count = 1; - - /* determine how many blocks in file to read */ - f_data_read = ADIOI_MIN(st_frd_size, bufsize); - total_blks_to_read = 1; - if (j < (flat_file->count-1)) j++; - else { - j = 0; - n_filetypes++; - } - while (f_data_read < bufsize) { - f_data_read += flat_file->blocklens[j]; - total_blks_to_read++; - if (j<(flat_file->count-1)) j++; - else j = 0; - } - - j = st_index; - n_filetypes = st_n_filetypes; - n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; - extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; - - mem_offsets = buf; - mem_lengths = 0; - - /* if at least one full readlist, allocate file arrays - at max array size and don't free until very end */ - if (n_read_lists) { - file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* - sizeof(int64_t)); - file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* - sizeof(int32_t)); - } - /* if there's no full readlist allocate file arrays according - to needed size (extra_blks) */ - else { - file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* - sizeof(int64_t)); - file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* - sizeof(int32_t)); - } - - /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ - for (i=0; iindices[j]; - file_lengths[k] = flat_file->blocklens[j]; - mem_lengths += file_lengths[k]; - } - if (j<(flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (k=0; kobject_ref, file_req, 0, - mem_offsets, mem_req, - &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_read", 0); - goto error_state; - } - /* --END ERROR HANDING-- */ - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - - total_bytes_read += resp_io.total_completed; - - mem_offsets += mem_lengths; - mem_lengths = 0; - } /* for (i=0; iindices[j]; - if (k == (extra_blks - 1)) { - file_lengths[k] = bufsize - (int32_t) mem_lengths - - (int32_t) mem_offsets + (int32_t) buf; - } - else file_lengths[k] = flat_file->blocklens[j]; - } /* if(i || k) */ - mem_lengths += file_lengths[k]; - if (j<(flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (k=0; kobject_ref, file_req, 0, - mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_read", 0); - goto error_state; - } - /* --END ERROR HANDLING-- */ - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - total_bytes_read += resp_io.total_completed; - } } - else { -/* noncontiguous in memory as well as in file */ - - ADIOI_Flatten_datatype(datatype); - flat_buf = ADIOI_Flatlist; - while (flat_buf->type != datatype) flat_buf = flat_buf->next; + /* Use classic list I/O if no hints given base case */ - size_read = 0; - n_filetypes = st_n_filetypes; - frd_size = st_frd_size; - brd_size = flat_buf->blocklens[0]; - buf_count = 0; - start_mem_offset = 0; - start_k = k = 0; - start_j = st_index; - max_mem_list = 0; - max_file_list = 0; - - /* run through and file max_file_list and max_mem_list so that you - can allocate the file and memory arrays less than MAX_ARRAY_SIZE - if possible */ - - while (size_read < bufsize) { - k = start_k; - new_buffer_read = 0; - mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) { - /* find mem_list_count and file_list_count such that both are - less than MAX_ARRAY_SIZE, the sum of their lengths are - equal, and the sum of all the data read and data to be - read in the next immediate read list is less than - bufsize */ - if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + - size_read) > bufsize) { - end_brd_size = new_buffer_read + - flat_buf->blocklens[k] - (bufsize - size_read); - new_buffer_read = bufsize - size_read; - } - else { - new_buffer_read += flat_buf->blocklens[k]; - end_brd_size = flat_buf->blocklens[k]; - } - } - else { - if (brd_size > (bufsize - size_read)) { - new_buffer_read = bufsize - size_read; - brd_size = new_buffer_read; - } - else new_buffer_read = brd_size; - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) */ - j = start_j; - new_file_read = 0; - file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_read < new_buffer_read)) { - if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > - new_buffer_read) { - end_frd_size = new_buffer_read - new_file_read; - new_file_read = new_buffer_read; - j--; - } - else { - new_file_read += flat_file->blocklens[j]; - end_frd_size = flat_file->blocklens[j]; - } - } - else { - if (frd_size > new_buffer_read) { - new_file_read = new_buffer_read; - frd_size = new_file_read; - } - else new_file_read = frd_size; - } - file_list_count++; - if (j < (flat_file->count - 1)) j++; - else j = 0; - - k = start_k; - if ((new_file_read < new_buffer_read) && - (file_list_count == MAX_ARRAY_SIZE)) { - new_buffer_read = 0; - mem_list_count = 0; - while (new_buffer_read < new_file_read) { - if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k]) > - new_file_read) { - end_brd_size = new_file_read - new_buffer_read; - new_buffer_read = new_file_read; - k--; - } - else { - new_buffer_read += flat_buf->blocklens[k]; - end_brd_size = flat_buf->blocklens[k]; - } - } - else { - new_buffer_read = brd_size; - if (brd_size > (bufsize - size_read)) { - new_buffer_read = bufsize - size_read; - brd_size = new_buffer_read; - } - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while (new_buffer_read < new_file_read) */ - } /* if ((new_file_read < new_buffer_read) && (file_list_count - == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) */ - - /* fakes filling the readlist arrays of lengths found above */ - k = start_k; - j = start_j; - for (i=0; iblocklens[k] == end_brd_size) - brd_size = flat_buf->blocklens[(k+1)% - flat_buf->count]; - else { - brd_size = flat_buf->blocklens[k] - end_brd_size; - k--; - buf_count--; - } - } - } - buf_count++; - k = (k + 1)%flat_buf->count; - } /* for (i=0; iblocklens[j] == end_frd_size) - frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; - else { - frd_size = flat_file->blocklens[j] - end_frd_size; - j--; - } - } - } - if (j < flat_file->count - 1) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (i=0; iblocklens[0] ) ) || - ((mem_list_count == 1) && - (new_buffer_read < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && - (new_file_read < flat_buf->blocklens[0]) ) || - ( (mem_list_count == MAX_ARRAY_SIZE) && - (new_buffer_read < flat_file->blocklens[0])) ) - { - - ADIOI_Delete_flattened(datatype); - ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype, - file_ptr_type, initial_off, status, error_code); - return; - } - - mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); - mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); - file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); - file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - - size_read = 0; - n_filetypes = st_n_filetypes; - frd_size = st_frd_size; - brd_size = flat_buf->blocklens[0]; - buf_count = 0; - start_mem_offset = 0; - start_k = k = 0; - start_j = st_index; - - /* this section calculates mem_list_count and file_list_count - and also finds the possibly odd sized last array elements - in new_frd_size and new_brd_size */ - - while (size_read < bufsize) { - k = start_k; - new_buffer_read = 0; - mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) { - /* find mem_list_count and file_list_count such that both are - less than MAX_ARRAY_SIZE, the sum of their lengths are - equal, and the sum of all the data read and data to be - read in the next immediate read list is less than - bufsize */ - if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + - size_read) > bufsize) { - end_brd_size = new_buffer_read + - flat_buf->blocklens[k] - (bufsize - size_read); - new_buffer_read = bufsize - size_read; - } - else { - new_buffer_read += flat_buf->blocklens[k]; - end_brd_size = flat_buf->blocklens[k]; - } - } - else { - if (brd_size > (bufsize - size_read)) { - new_buffer_read = bufsize - size_read; - brd_size = new_buffer_read; - } - else new_buffer_read = brd_size; - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) */ - j = start_j; - new_file_read = 0; - file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_read < new_buffer_read)) { - if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > - new_buffer_read) { - end_frd_size = new_buffer_read - new_file_read; - new_file_read = new_buffer_read; - j--; - } - else { - new_file_read += flat_file->blocklens[j]; - end_frd_size = flat_file->blocklens[j]; - } - } - else { - if (frd_size > new_buffer_read) { - new_file_read = new_buffer_read; - frd_size = new_file_read; - } - else new_file_read = frd_size; - } - file_list_count++; - if (j < (flat_file->count - 1)) j++; - else j = 0; - - k = start_k; - if ((new_file_read < new_buffer_read) && - (file_list_count == MAX_ARRAY_SIZE)) { - new_buffer_read = 0; - mem_list_count = 0; - while (new_buffer_read < new_file_read) { - if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k]) > - new_file_read) { - end_brd_size = new_file_read - new_buffer_read; - new_buffer_read = new_file_read; - k--; - } - else { - new_buffer_read += flat_buf->blocklens[k]; - end_brd_size = flat_buf->blocklens[k]; - } - } - else { - new_buffer_read = brd_size; - if (brd_size > (bufsize - size_read)) { - new_buffer_read = bufsize - size_read; - brd_size = new_buffer_read; - } - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while (new_buffer_read < new_file_read) */ - } /* if ((new_file_read < new_buffer_read) && (file_list_count - == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_read < bufsize-size_read)) */ - - /* fills the allocated readlist arrays */ - k = start_k; - j = start_j; - for (i=0; icount) + - (int)flat_buf->indices[k]); - if(!i) { - mem_lengths[0] = brd_size; - mem_offsets[0] += flat_buf->blocklens[k] - brd_size; - } - else { - if (i == (mem_list_count - 1)) { - mem_lengths[i] = end_brd_size; - if (flat_buf->blocklens[k] == end_brd_size) - brd_size = flat_buf->blocklens[(k+1)% - flat_buf->count]; - else { - brd_size = flat_buf->blocklens[k] - end_brd_size; - k--; - buf_count--; - } - } - else { - mem_lengths[i] = flat_buf->blocklens[k]; - } - } - buf_count++; - k = (k + 1)%flat_buf->count; - } /* for (i=0; iindices[j] + - ((ADIO_Offset)n_filetypes) * filetype_extent; - if (!i) { - file_lengths[0] = frd_size; - file_offsets[0] += flat_file->blocklens[j] - frd_size; - } - else { - if (i == (file_list_count - 1)) { - file_lengths[i] = end_frd_size; - if (flat_file->blocklens[j] == end_frd_size) - frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; - else { - frd_size = flat_file->blocklens[j] - end_frd_size; - j--; - } - } - else file_lengths[i] = flat_file->blocklens[j]; - } - if (j < flat_file->count - 1) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (i=0; iobject_ref, file_req, 0, - PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_read", 0); - } - /* --END ERROR HANDLING-- */ - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - total_bytes_read += resp_io.total_completed; - size_read += new_buffer_read; - start_k = k; - start_j = j; - } /* while (size_read < bufsize) */ - ADIOI_Free(mem_offsets); - ADIOI_Free(mem_lengths); - } - ADIOI_Free(file_offsets); - ADIOI_Free(file_lengths); - - /* Other ADIO routines will convert absolute bytes into counts of datatypes */ - /* when incrementing fp_ind, need to also take into account the file type: - * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| - * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1) */ - if (file_ptr_type == ADIO_INDIVIDUAL) { - /* this is closer, but still incorrect for the cases where a small - * amount of a file type is "leftover" after a write */ - fd->fp_ind = disp + flat_file->indices[j] + - ((ADIO_Offset)n_filetypes)*filetype_extent; - } - if (err_flag == 0) *error_code = MPI_SUCCESS; - -error_state: - fd->fp_sys_posn = -1; /* set it to null. */ - -#ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, bufsize); - /* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf - by ADIOI_BUFFERED_READ. */ -#endif - - if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); + ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype, + file_ptr_type, offset, status, error_code); + return; } + /* * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c new file mode 100644 index 0000000000..d5ceefa464 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c @@ -0,0 +1,909 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" +#include "adio_extern.h" +#include "ad_pvfs2.h" + +#include "ad_pvfs2_common.h" + +void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code) +{ + /* offset is in units of etype relative to the filetype. */ + ADIOI_Flatlist_node *flat_buf, *flat_file; + int i, j, k, brd_size, frd_size=0, st_index=0; + int bufsize, sum, n_etypes_in_filetype, size_in_filetype; + int n_filetypes, etype_in_filetype; + ADIO_Offset abs_off_in_filetype=0; + int filetype_size, etype_size, buftype_size; + MPI_Aint filetype_extent, buftype_extent; + int buf_count, buftype_is_contig, filetype_is_contig; + ADIO_Offset off, disp, start_off, initial_off; + int flag, st_frd_size, st_n_filetypes; + + int mem_list_count, file_list_count; + PVFS_size *mem_offsets; + int64_t *file_offsets; + int *mem_lengths; + int32_t *file_lengths; + int total_blks_to_read; + + int max_mem_list, max_file_list; + + int b_blks_read; + int f_data_read; + int size_read=0, n_read_lists, extra_blks; + + int end_brd_size, end_frd_size; + int start_k, start_j, new_file_read, new_buffer_read; + int start_mem_offset; + PVFS_Request mem_req, file_req; + ADIOI_PVFS2_fs * pvfs_fs; + PVFS_sysresp_io resp_io; + int err_flag=0; + MPI_Offset total_bytes_read = 0; + static char myname[] = "ADIOI_PVFS2_ReadStrided"; + +#define MAX_ARRAY_SIZE 64 + + *error_code = MPI_SUCCESS; /* changed below if error */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + /* the HDF5 tests showed a bug in this list processing code (see many many + * lines down below). We added a workaround, but common HDF5 file types + * are actually contiguous and do not need the expensive workarond */ + if (!filetype_is_contig) { + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + if (flat_file->count == 1 && !buftype_is_contig) + filetype_is_contig = 1; + } + + MPI_Type_size(fd->filetype, &filetype_size); + if ( ! filetype_size ) { + *error_code = MPI_SUCCESS; + return; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + etype_size = fd->etype_size; + + bufsize = buftype_size * count; + + pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; + + if (!buftype_is_contig && filetype_is_contig) { + +/* noncontiguous in memory, contiguous in file. */ + int64_t file_offsets; + int32_t file_lengths; + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + fd->disp + etype_size * offset; + + file_list_count = 1; + file_offsets = off; + file_lengths = 0; + total_blks_to_read = count*flat_buf->count; + b_blks_read = 0; + + /* allocate arrays according to max usage */ + if (total_blks_to_read > MAX_ARRAY_SIZE) + mem_list_count = MAX_ARRAY_SIZE; + else mem_list_count = total_blks_to_read; + mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); + mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); + + /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */ + + j = 0; + /* step through each block in memory, filling memory arrays */ + while (b_blks_read < total_blks_to_read) { + for (i=0; icount; i++) { + mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = + /* TODO: fix this compiler warning */ + ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); + mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = + flat_buf->blocklens[i]; + file_lengths += flat_buf->blocklens[i]; + b_blks_read++; + if (!(b_blks_read % MAX_ARRAY_SIZE) || + (b_blks_read == total_blks_to_read)) { + + /* in the case of the last read list call, + adjust mem_list_count */ + if (b_blks_read == total_blks_to_read) { + mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE; + /* in case last read list call fills max arrays */ + if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; + } + err_flag = PVFS_Request_hindexed(mem_list_count, + mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); + if (err_flag < 0) break; + err_flag = PVFS_Request_contiguous(file_lengths, + PVFS_BYTE, &file_req); + if (err_flag < 0) break; +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); +#endif + err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, + file_offsets, PVFS_BOTTOM, mem_req, + &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_read", 0); + goto error_state; + } + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + total_bytes_read += resp_io.total_completed; + /* --END ERROR HANDLING-- */ + + /* in the case of error or the last read list call, + * leave here */ + if (err_flag || b_blks_read == total_blks_to_read) break; + + file_offsets += file_lengths; + file_lengths = 0; + } + } /* for (i=0; icount; i++) */ + j++; + } /* while (b_blks_read < total_blks_to_read) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind += total_bytes_read; + + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); + /* This isa temporary way of filling in status. The right way is to + keep tracke of how much data was actually read adn placed in buf + by ADIOI_BUFFERED_READ. */ +#endif + ADIOI_Delete_flattened(datatype); + + return; + } /* if (!buftype_is_contig && filetype_is_contig) */ + + /* know file is noncontiguous from above */ + /* noncontiguous in file */ + + /* filetype already flattened in ADIO_Open */ + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + + disp = fd->disp; + initial_off = offset; + + + /* for each case - ADIO_Individual pointer or explicit, find the file + offset in bytes (offset), n_filetypes (how many filetypes into + file to start), frd_size (remaining amount of data in present + file block), and st_index (start point in terms of blocks in + starting filetype) */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + offset = fd->fp_ind; /* in bytes */ + n_filetypes = -1; + flag = 0; + while (!flag) { + n_filetypes++; + for (i=0; icount; i++) { + if (disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] >= offset) { + st_index = i; + frd_size = (int) (disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] - offset); + flag = 1; + break; + } + } + } /* while (!flag) */ + } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ + else { + n_etypes_in_filetype = filetype_size/etype_size; + n_filetypes = (int) (offset / n_etypes_in_filetype); + etype_in_filetype = (int) (offset % n_etypes_in_filetype); + size_in_filetype = etype_in_filetype * etype_size; + + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum > size_in_filetype) { + st_index = i; + frd_size = sum - size_in_filetype; + abs_off_in_filetype = flat_file->indices[i] + + size_in_filetype - (sum - flat_file->blocklens[i]); + break; + } + } + + /* abs. offset in bytes in the file */ + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + abs_off_in_filetype; + } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ + + start_off = offset; + st_frd_size = frd_size; + st_n_filetypes = n_filetypes; + + if (buftype_is_contig && !filetype_is_contig) { + +/* contiguous in memory, noncontiguous in file. should be the most + common case. */ + + int mem_lengths; + char *mem_offsets; + + i = 0; + j = st_index; + n_filetypes = st_n_filetypes; + + mem_list_count = 1; + + /* determine how many blocks in file to read */ + f_data_read = ADIOI_MIN(st_frd_size, bufsize); + total_blks_to_read = 1; + if (j < (flat_file->count-1)) j++; + else { + j = 0; + n_filetypes++; + } + while (f_data_read < bufsize) { + f_data_read += flat_file->blocklens[j]; + total_blks_to_read++; + if (j<(flat_file->count-1)) j++; + else j = 0; + } + + j = st_index; + n_filetypes = st_n_filetypes; + n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; + extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; + + mem_offsets = buf; + mem_lengths = 0; + + /* if at least one full readlist, allocate file arrays + at max array size and don't free until very end */ + if (n_read_lists) { + file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int64_t)); + file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int32_t)); + } + /* if there's no full readlist allocate file arrays according + to needed size (extra_blks) */ + else { + file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* + sizeof(int64_t)); + file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* + sizeof(int32_t)); + } + + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ + for (i=0; iindices[j]; + file_lengths[k] = flat_file->blocklens[j]; + mem_lengths += file_lengths[k]; + } + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kobject_ref, file_req, 0, + mem_offsets, mem_req, + &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_read", 0); + goto error_state; + } + /* --END ERROR HANDING-- */ + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + + total_bytes_read += resp_io.total_completed; + + mem_offsets += mem_lengths; + mem_lengths = 0; + } /* for (i=0; iindices[j]; + if (k == (extra_blks - 1)) { + file_lengths[k] = bufsize - (int32_t) mem_lengths + - (int32_t) mem_offsets + (int32_t) buf; + } + else file_lengths[k] = flat_file->blocklens[j]; + } /* if(i || k) */ + mem_lengths += file_lengths[k]; + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kobject_ref, file_req, 0, + mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_read", 0); + goto error_state; + } + /* --END ERROR HANDLING-- */ + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + total_bytes_read += resp_io.total_completed; + } + } + else { +/* noncontiguous in memory as well as in file */ + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + size_read = 0; + n_filetypes = st_n_filetypes; + frd_size = st_frd_size; + brd_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + max_mem_list = 0; + max_file_list = 0; + + /* run through and file max_file_list and max_mem_list so that you + can allocate the file and memory arrays less than MAX_ARRAY_SIZE + if possible */ + + while (size_read < bufsize) { + k = start_k; + new_buffer_read = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data read and data to be + read in the next immediate read list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k] + + size_read) > bufsize) { + end_brd_size = new_buffer_read + + flat_buf->blocklens[k] - (bufsize - size_read); + new_buffer_read = bufsize - size_read; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + else new_buffer_read = brd_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + j = start_j; + new_file_read = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_read < new_buffer_read)) { + if(file_list_count) { + if((new_file_read + flat_file->blocklens[j]) > + new_buffer_read) { + end_frd_size = new_buffer_read - new_file_read; + new_file_read = new_buffer_read; + j--; + } + else { + new_file_read += flat_file->blocklens[j]; + end_frd_size = flat_file->blocklens[j]; + } + } + else { + if (frd_size > new_buffer_read) { + new_file_read = new_buffer_read; + frd_size = new_file_read; + } + else new_file_read = frd_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_read < new_buffer_read) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_read = 0; + mem_list_count = 0; + while (new_buffer_read < new_file_read) { + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k]) > + new_file_read) { + end_brd_size = new_file_read - new_buffer_read; + new_buffer_read = new_file_read; + k--; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_read = brd_size; + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_read < new_file_read) */ + } /* if ((new_file_read < new_buffer_read) && (file_list_count + == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + + /* fakes filling the readlist arrays of lengths found above */ + k = start_k; + j = start_j; + for (i=0; iblocklens[k] == end_brd_size) + brd_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + brd_size = flat_buf->blocklens[k] - end_brd_size; + k--; + buf_count--; + } + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iblocklens[j] == end_frd_size) + frd_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + frd_size = flat_file->blocklens[j] - end_frd_size; + j--; + } + } + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iblocklens[0] ) ) || + ((mem_list_count == 1) && + (new_buffer_read < flat_buf->blocklens[0]) ) || + ((file_list_count == MAX_ARRAY_SIZE) && + (new_file_read < flat_buf->blocklens[0]) ) || + ( (mem_list_count == MAX_ARRAY_SIZE) && + (new_buffer_read < flat_file->blocklens[0])) ) + { + + ADIOI_Delete_flattened(datatype); + ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype, + file_ptr_type, initial_off, status, error_code); + return; + } + + mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); + mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); + file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); + file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); + + size_read = 0; + n_filetypes = st_n_filetypes; + frd_size = st_frd_size; + brd_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + + /* this section calculates mem_list_count and file_list_count + and also finds the possibly odd sized last array elements + in new_frd_size and new_brd_size */ + + while (size_read < bufsize) { + k = start_k; + new_buffer_read = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data read and data to be + read in the next immediate read list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k] + + size_read) > bufsize) { + end_brd_size = new_buffer_read + + flat_buf->blocklens[k] - (bufsize - size_read); + new_buffer_read = bufsize - size_read; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + else new_buffer_read = brd_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + j = start_j; + new_file_read = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_read < new_buffer_read)) { + if(file_list_count) { + if((new_file_read + flat_file->blocklens[j]) > + new_buffer_read) { + end_frd_size = new_buffer_read - new_file_read; + new_file_read = new_buffer_read; + j--; + } + else { + new_file_read += flat_file->blocklens[j]; + end_frd_size = flat_file->blocklens[j]; + } + } + else { + if (frd_size > new_buffer_read) { + new_file_read = new_buffer_read; + frd_size = new_file_read; + } + else new_file_read = frd_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_read < new_buffer_read) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_read = 0; + mem_list_count = 0; + while (new_buffer_read < new_file_read) { + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k]) > + new_file_read) { + end_brd_size = new_file_read - new_buffer_read; + new_buffer_read = new_file_read; + k--; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_read = brd_size; + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_read < new_file_read) */ + } /* if ((new_file_read < new_buffer_read) && (file_list_count + == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + + /* fills the allocated readlist arrays */ + k = start_k; + j = start_j; + for (i=0; icount) + + (int)flat_buf->indices[k]); + if(!i) { + mem_lengths[0] = brd_size; + mem_offsets[0] += flat_buf->blocklens[k] - brd_size; + } + else { + if (i == (mem_list_count - 1)) { + mem_lengths[i] = end_brd_size; + if (flat_buf->blocklens[k] == end_brd_size) + brd_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + brd_size = flat_buf->blocklens[k] - end_brd_size; + k--; + buf_count--; + } + } + else { + mem_lengths[i] = flat_buf->blocklens[k]; + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iindices[j] + + ((ADIO_Offset)n_filetypes) * filetype_extent; + if (!i) { + file_lengths[0] = frd_size; + file_offsets[0] += flat_file->blocklens[j] - frd_size; + } + else { + if (i == (file_list_count - 1)) { + file_lengths[i] = end_frd_size; + if (flat_file->blocklens[j] == end_frd_size) + frd_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + frd_size = flat_file->blocklens[j] - end_frd_size; + j--; + } + } + else file_lengths[i] = flat_file->blocklens[j]; + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iobject_ref, file_req, 0, + PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_read", 0); + } + /* --END ERROR HANDLING-- */ + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + total_bytes_read += resp_io.total_completed; + size_read += new_buffer_read; + start_k = k; + start_j = j; + } /* while (size_read < bufsize) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + } + /* Other ADIO routines will convert absolute bytes into counts of datatypes */ + /* when incrementing fp_ind, need to also take into account the file type: + * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| + * if we wrote N elements, offset needs to point at beginning of type, not + * at empty region at offset N+1) + * + * As we discussed on mpich-discuss in may/june 2009, the code below might + * look wierd, but by putting fp_ind at the last byte written, the next + * time we run through the strided code we'll update the fp_ind to the + * right location. */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + fd->fp_ind = file_offsets[file_list_count-1]+ + file_lengths[file_list_count-1]; + } + + ADIOI_Free(file_offsets); + ADIOI_Free(file_lengths); + + if (err_flag == 0) *error_code = MPI_SUCCESS; + +error_state: + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); + /* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf + by ADIOI_BUFFERED_READ. */ +#endif + + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); +} + diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c index aaa4c75a41..47f64ad2fd 100644 --- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c @@ -7,7 +7,7 @@ #include "ad_pvfs2.h" #include "adio_extern.h" - +#include "ad_pvfs2_io.h" #include "ad_pvfs2_common.h" void ADIOI_PVFS2_WriteContig(ADIO_File fd, void *buf, int count, @@ -104,950 +104,78 @@ fn_exit: return; } +int ADIOI_PVFS2_WriteStridedListIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + return ADIOI_PVFS2_StridedListIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, + error_code, WRITE); +} + +int ADIOI_PVFS2_WriteStridedDtypeIO(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code, + WRITE); +} + + void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - /* as with all the other WriteStrided functions, offset is in units of - * etype relative to the filetype */ + /* four ways (to date) that we can carry out strided i/o accesses: + * - naive posix + * - 'true' Datatype (from avery) + * - new List I/O (from avery) + * - classic List I/O (the one that's always been in ROMIO) + * I imagine we'll keep Datatype as an optional optimization, and afer a + * release or two promote it to the default + */ - /* Since PVFS2 does not support file locking, can't do buffered writes - as on Unix */ - - ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, bwr_size, fwr_size=0, st_index=0; - int bufsize, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; - int buf_count, buftype_is_contig, filetype_is_contig; - ADIO_Offset off, disp, start_off, initial_off; - int flag, st_fwr_size, st_n_filetypes; - int err_flag=0; - - int mem_list_count, file_list_count; - PVFS_size * mem_offsets; - int64_t *file_offsets; - int *mem_lengths; - int32_t *file_lengths; - int total_blks_to_write; - - int max_mem_list, max_file_list; - - int b_blks_wrote; - int f_data_wrote; - int size_wrote=0, n_write_lists, extra_blks; - - int end_bwr_size, end_fwr_size; - int start_k, start_j, new_file_write, new_buffer_write; - int start_mem_offset; - PVFS_Request mem_req, file_req; - ADIOI_PVFS2_fs * pvfs_fs; - PVFS_sysresp_io resp_io; - MPI_Offset total_bytes_written=0; - static char myname[] = "ADIOI_PVFS2_WRITESTRIDED"; - - /* note: don't increase this: several parts of PVFS2 now - * assume this limit*/ -#define MAX_ARRAY_SIZE 64 - - /* --BEGIN ERROR HANDLING-- */ - if (fd->atomicity) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - MPI_ERR_ARG, - "Atomic noncontiguous writes are not supported by PVFS2", 0); - return; + /* a lot of near-duplication from ADIOI_PVFS2_ReadStrided: for + * debugging/testing it's helpful to be able to turn on and off these + * optimizations separately for the read and write cases */ + int ret = -1; + if ( fd->hints->fs_hints.pvfs2.posix_write == ADIOI_HINT_ENABLE) { + ADIOI_GEN_WriteStrided_naive(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code); + return; } - /* --END ERROR HANDLING-- */ + if ( fd->hints->fs_hints.pvfs2.dtype_write == ADIOI_HINT_ENABLE) { + ret = ADIOI_PVFS2_WriteStridedDtypeIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code); - ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); - ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); - - /* the HDF5 tests showed a bug in this list processing code (see many many - * lines down below). We added a workaround, but common HDF5 file types - * are actually contiguous and do not need the expensive workarond */ - if (!filetype_is_contig) { - flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) flat_file = flat_file->next; - if (flat_file->count == 1 && !buftype_is_contig) - filetype_is_contig = 1; - } - - MPI_Type_size(fd->filetype, &filetype_size); - if ( ! filetype_size ) { - *error_code = MPI_SUCCESS; - return; - } - - MPI_Type_extent(fd->filetype, &filetype_extent); - MPI_Type_size(datatype, &buftype_size); - MPI_Type_extent(datatype, &buftype_extent); - etype_size = fd->etype_size; - - bufsize = buftype_size * count; - - pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; - - if (!buftype_is_contig && filetype_is_contig) { - -/* noncontiguous in memory, contiguous in file. */ - int64_t file_offsets; - int32_t file_lengths; - - ADIOI_Flatten_datatype(datatype); - flat_buf = ADIOI_Flatlist; - while (flat_buf->type != datatype) flat_buf = flat_buf->next; - - if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { - off = fd->disp + etype_size * offset; - } - else off = fd->fp_ind; - - file_list_count = 1; - file_offsets = off; - file_lengths = 0; - total_blks_to_write = count*flat_buf->count; - b_blks_wrote = 0; - - /* allocate arrays according to max usage */ - if (total_blks_to_write > MAX_ARRAY_SIZE) - mem_list_count = MAX_ARRAY_SIZE; - else mem_list_count = total_blks_to_write; - mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); - mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); - - j = 0; - /* step through each block in memory, filling memory arrays */ - while (b_blks_wrote < total_blks_to_write) { - for (i=0; icount; i++) { - mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = - /* TODO: fix this warning by casting to an integer that's - * the same size as a char * and /then/ casting to - * PVFS_size */ - ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = - flat_buf->blocklens[i]; - file_lengths += flat_buf->blocklens[i]; - b_blks_wrote++; - if (!(b_blks_wrote % MAX_ARRAY_SIZE) || - (b_blks_wrote == total_blks_to_write)) { - - /* in the case of the last write list call, - adjust mem_list_count */ - if (b_blks_wrote == total_blks_to_write) { - mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE; - /* in case last write list call fills max arrays */ - if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; - } - err_flag = PVFS_Request_hindexed(mem_list_count, - mem_lengths, mem_offsets, - PVFS_BYTE, &mem_req); - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_Request_hindexed (memory)", 0); - break; - } - /* --END ERROR HANDLING-- */ - - err_flag = PVFS_Request_contiguous(file_lengths, - PVFS_BYTE, &file_req); - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_Request_contiguous (file)", 0); - break; - } - /* --END ERROR HANDLING-- */ - -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); -#endif - err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, - file_offsets, PVFS_BOTTOM, - mem_req, - &(pvfs_fs->credentials), - &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); -#endif - total_bytes_written += resp_io.total_completed; - - /* in the case of error or the last write list call, - * leave here */ - /* --BEGIN ERROR HANDLING-- */ - if (err_flag) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_write", 0); - break; - } - /* --END ERROR HANDLING-- */ - if (b_blks_wrote == total_blks_to_write) break; - - file_offsets += file_lengths; - file_lengths = 0; - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - } - } /* for (i=0; icount; i++) */ - j++; - } /* while (b_blks_wrote < total_blks_to_write) */ - ADIOI_Free(mem_offsets); - ADIOI_Free(mem_lengths); - - if (file_ptr_type == ADIO_INDIVIDUAL) - fd->fp_ind += total_bytes_written; - - if (!err_flag) *error_code = MPI_SUCCESS; - - fd->fp_sys_posn = -1; /* clear this. */ - -#ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ -#endif - - ADIOI_Delete_flattened(datatype); - return; - } /* if (!buftype_is_contig && filetype_is_contig) */ - - /* already know that file is noncontiguous from above */ - /* noncontiguous in file */ - -/* filetype already flattened in ADIO_Open */ - flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) flat_file = flat_file->next; - - disp = fd->disp; - initial_off = offset; - - /* for each case - ADIO_Individual pointer or explicit, find offset - (file offset in bytes), n_filetypes (how many filetypes into file - to start), fwr_size (remaining amount of data in present file - block), and st_index (start point in terms of blocks in starting - filetype) */ - if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - ((ADIO_Offset) n_filetypes)*filetype_extent + - flat_file->blocklens[i] >= offset) { - st_index = i; - fwr_size = disp + flat_file->indices[i] + - ((ADIO_Offset) n_filetypes)*filetype_extent - + flat_file->blocklens[i] - offset; - flag = 1; - break; - } - } - } /* while (!flag) */ - } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ - else { - n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); - size_in_filetype = etype_in_filetype * etype_size; - - sum = 0; - for (i=0; icount; i++) { - sum += flat_file->blocklens[i]; - if (sum > size_in_filetype) { - st_index = i; - fwr_size = sum - size_in_filetype; - abs_off_in_filetype = flat_file->indices[i] + - size_in_filetype - (sum - flat_file->blocklens[i]); - break; - } - } - - /* abs. offset in bytes in the file */ - offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + - abs_off_in_filetype; - } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ - - start_off = offset; - st_fwr_size = fwr_size; - st_n_filetypes = n_filetypes; - - if (buftype_is_contig && !filetype_is_contig) { - -/* contiguous in memory, noncontiguous in file. should be the most - common case. */ - - int mem_lengths; - char *mem_offsets; - - i = 0; - j = st_index; - off = offset; - n_filetypes = st_n_filetypes; - - mem_list_count = 1; - - /* determine how many blocks in file to write */ - f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); - total_blks_to_write = 1; - if (j < (flat_file->count -1)) j++; - else { - j = 0; - n_filetypes++; - } - while (f_data_wrote < bufsize) { - f_data_wrote += flat_file->blocklens[j]; - total_blks_to_write++; - if (j<(flat_file->count-1)) j++; - else j = 0; - } - - j = st_index; - n_filetypes = st_n_filetypes; - n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; - extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; - - mem_offsets = buf; - mem_lengths = 0; - - /* if at least one full writelist, allocate file arrays - at max array size and don't free until very end */ - if (n_write_lists) { - file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* - sizeof(int64_t)); - file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* - sizeof(int32_t)); - } - /* if there's no full writelist allocate file arrays according - to needed size (extra_blks) */ - else { - file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* - sizeof(int64_t)); - file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* - sizeof(int32_t)); + /* Fall back to list I/O if datatype I/O didn't work */ + if (ret != 0) + { + fprintf(stderr, + "Falling back to list I/O since datatype I/O failed\n"); + ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, + datatype, file_ptr_type, + offset, status, error_code); } - - /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ - for (i=0; iindices[j]; - file_lengths[k] = flat_file->blocklens[j]; - mem_lengths += file_lengths[k]; - } - if (j<(flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (k=0; kobject_ref, file_req, 0, - mem_offsets, mem_req, - &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_write", 0); - goto error_state; - } - /* --END ERROR HANDLING-- */ - total_bytes_written += resp_io.total_completed; - - mem_offsets += mem_lengths; - mem_lengths = 0; - PVFS_Request_free(&file_req); - PVFS_Request_free(&mem_req); - - } /* for (i=0; iindices[j]; - if (k == (extra_blks - 1)) { - file_lengths[k] = bufsize - (int32_t) mem_lengths - - (int32_t) mem_offsets + (int32_t) buf; - } - else file_lengths[k] = flat_file->blocklens[j]; - } /* if(i || k) */ - mem_lengths += file_lengths[k]; - if (j<(flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (k=0; kobject_ref, file_req, 0, - mem_offsets, mem_req, - &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_write", 0); - goto error_state; - } - /* --END ERROR HANDLING-- */ - total_bytes_written += resp_io.total_completed; - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - } - } - else { - /* noncontiguous in memory as well as in file */ - - ADIOI_Flatten_datatype(datatype); - flat_buf = ADIOI_Flatlist; - while (flat_buf->type != datatype) flat_buf = flat_buf->next; - - size_wrote = 0; - n_filetypes = st_n_filetypes; - fwr_size = st_fwr_size; - bwr_size = flat_buf->blocklens[0]; - buf_count = 0; - start_mem_offset = 0; - start_k = k = 0; - start_j = st_index; - max_mem_list = 0; - max_file_list = 0; - - /* run through and file max_file_list and max_mem_list so that you - can allocate the file and memory arrays less than MAX_ARRAY_SIZE - if possible */ - - while (size_wrote < bufsize) { - k = start_k; - new_buffer_write = 0; - mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) { - /* find mem_list_count and file_list_count such that both are - less than MAX_ARRAY_SIZE, the sum of their lengths are - equal, and the sum of all the data written and data to be - written in the next immediate write list is less than - bufsize */ - if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + - size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + - flat_buf->blocklens[k] - (bufsize - size_wrote); - new_buffer_write = bufsize - size_wrote; - } - else { - new_buffer_write += flat_buf->blocklens[k]; - end_bwr_size = flat_buf->blocklens[k]; - } - } - else { - if (bwr_size > (bufsize - size_wrote)) { - new_buffer_write = bufsize - size_wrote; - bwr_size = new_buffer_write; - } - else new_buffer_write = bwr_size; - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) */ - j = start_j; - new_file_write = 0; - file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_write < new_buffer_write)) { - if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > - new_buffer_write) { - end_fwr_size = new_buffer_write - new_file_write; - new_file_write = new_buffer_write; - j--; - } - else { - new_file_write += flat_file->blocklens[j]; - end_fwr_size = flat_file->blocklens[j]; - } - } - else { - if (fwr_size > new_buffer_write) { - new_file_write = new_buffer_write; - fwr_size = new_file_write; - } - else new_file_write = fwr_size; - } - file_list_count++; - if (j < (flat_file->count - 1)) j++; - else j = 0; - - k = start_k; - if ((new_file_write < new_buffer_write) && - (file_list_count == MAX_ARRAY_SIZE)) { - new_buffer_write = 0; - mem_list_count = 0; - while (new_buffer_write < new_file_write) { - if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k]) > - new_file_write) { - end_bwr_size = new_file_write - - new_buffer_write; - new_buffer_write = new_file_write; - k--; - } - else { - new_buffer_write += flat_buf->blocklens[k]; - end_bwr_size = flat_buf->blocklens[k]; - } - } - else { - new_buffer_write = bwr_size; - if (bwr_size > (bufsize - size_wrote)) { - new_buffer_write = bufsize - size_wrote; - bwr_size = new_buffer_write; - } - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while (new_buffer_write < new_file_write) */ - } /* if ((new_file_write < new_buffer_write) && - (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) */ - - /* fakes filling the writelist arrays of lengths found above */ - k = start_k; - j = start_j; - for (i=0; iblocklens[k] == end_bwr_size) - bwr_size = flat_buf->blocklens[(k+1)% - flat_buf->count]; - else { - bwr_size = flat_buf->blocklens[k] - end_bwr_size; - k--; - buf_count--; - } - } - } - buf_count++; - k = (k + 1)%flat_buf->count; - } /* for (i=0; iblocklens[j] == end_fwr_size) - fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; - else { - fwr_size = flat_file->blocklens[j] - end_fwr_size; - j--; - } - } - } - if (j < flat_file->count - 1) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (i=0; iblocklens[0] ) ) || - ((mem_list_count == 1) && - (new_buffer_write < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && - (new_file_write < flat_buf->blocklens[0]) ) || - ( (mem_list_count == MAX_ARRAY_SIZE) && - (new_buffer_write < flat_file->blocklens[0])) ) - { - ADIOI_Delete_flattened(datatype); - ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, - file_ptr_type, initial_off, status, error_code); - return; - } - - - mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); - mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); - file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); - file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - - size_wrote = 0; - n_filetypes = st_n_filetypes; - fwr_size = st_fwr_size; - bwr_size = flat_buf->blocklens[0]; - buf_count = 0; - start_mem_offset = 0; - start_k = k = 0; - start_j = st_index; - - /* this section calculates mem_list_count and file_list_count - and also finds the possibly odd sized last array elements - in new_fwr_size and new_bwr_size */ - - while (size_wrote < bufsize) { - k = start_k; - new_buffer_write = 0; - mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) { - /* find mem_list_count and file_list_count such that both are - less than MAX_ARRAY_SIZE, the sum of their lengths are - equal, and the sum of all the data written and data to be - written in the next immediate write list is less than - bufsize */ - if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + - size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + - flat_buf->blocklens[k] - (bufsize - size_wrote); - new_buffer_write = bufsize - size_wrote; - } - else { - new_buffer_write += flat_buf->blocklens[k]; - end_bwr_size = flat_buf->blocklens[k]; - } - } - else { - if (bwr_size > (bufsize - size_wrote)) { - new_buffer_write = bufsize - size_wrote; - bwr_size = new_buffer_write; - } - else new_buffer_write = bwr_size; - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) */ - j = start_j; - new_file_write = 0; - file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_write < new_buffer_write)) { - if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > - new_buffer_write) { - end_fwr_size = new_buffer_write - new_file_write; - new_file_write = new_buffer_write; - j--; - } - else { - new_file_write += flat_file->blocklens[j]; - end_fwr_size = flat_file->blocklens[j]; - } - } - else { - if (fwr_size > new_buffer_write) { - new_file_write = new_buffer_write; - fwr_size = new_file_write; - } - else new_file_write = fwr_size; - } - file_list_count++; - if (j < (flat_file->count - 1)) j++; - else j = 0; - - k = start_k; - if ((new_file_write < new_buffer_write) && - (file_list_count == MAX_ARRAY_SIZE)) { - new_buffer_write = 0; - mem_list_count = 0; - while (new_buffer_write < new_file_write) { - if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k]) > - new_file_write) { - end_bwr_size = new_file_write - - new_buffer_write; - new_buffer_write = new_file_write; - k--; - } - else { - new_buffer_write += flat_buf->blocklens[k]; - end_bwr_size = flat_buf->blocklens[k]; - } - } - else { - new_buffer_write = bwr_size; - if (bwr_size > (bufsize - size_wrote)) { - new_buffer_write = bufsize - size_wrote; - bwr_size = new_buffer_write; - } - } - mem_list_count++; - k = (k + 1)%flat_buf->count; - } /* while (new_buffer_write < new_file_write) */ - } /* if ((new_file_write < new_buffer_write) && - (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && - (new_buffer_write < bufsize-size_wrote)) */ - - /* fills the allocated writelist arrays */ - k = start_k; - j = start_j; - for (i=0; icount) + - (int)flat_buf->indices[k]); - - if(!i) { - mem_lengths[0] = bwr_size; - mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; - } - else { - if (i == (mem_list_count - 1)) { - mem_lengths[i] = end_bwr_size; - if (flat_buf->blocklens[k] == end_bwr_size) - bwr_size = flat_buf->blocklens[(k+1)% - flat_buf->count]; - else { - bwr_size = flat_buf->blocklens[k] - end_bwr_size; - k--; - buf_count--; - } - } - else { - mem_lengths[i] = flat_buf->blocklens[k]; - } - } - buf_count++; - k = (k + 1)%flat_buf->count; - } /* for (i=0; iindices[j] + - ((ADIO_Offset)n_filetypes) * filetype_extent; - if (!i) { - file_lengths[0] = fwr_size; - file_offsets[0] += flat_file->blocklens[j] - fwr_size; - } - else { - if (i == (file_list_count - 1)) { - file_lengths[i] = end_fwr_size; - if (flat_file->blocklens[j] == end_fwr_size) - fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; - else { - fwr_size = flat_file->blocklens[j] - end_fwr_size; - j--; - } - } - else file_lengths[i] = flat_file->blocklens[j]; - } - if (j < flat_file->count - 1) j++; - else { - j = 0; - n_filetypes++; - } - } /* for (i=0; iobject_ref, file_req, 0, - PVFS_BOTTOM, mem_req, - &(pvfs_fs->credentials), &resp_io); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); -#endif - /* --BEGIN ERROR HANDLING-- */ - if (err_flag != 0) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, - ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_write", 0); - goto error_state; - } - /* --END ERROR HANDLING-- */ - - size_wrote += new_buffer_write; - total_bytes_written += resp_io.total_completed; - start_k = k; - start_j = j; - PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); - } /* while (size_wrote < bufsize) */ - ADIOI_Free(mem_offsets); - ADIOI_Free(mem_lengths); + return; } - ADIOI_Free(file_offsets); - ADIOI_Free(file_lengths); - - /* when incrementing fp_ind, need to also take into account the file type: - * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| - * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1) */ - if (file_ptr_type == ADIO_INDIVIDUAL) { - /* this is closer, but still incorrect for the cases where a small - * amount of a file type is "leftover" after a write */ - fd->fp_ind = disp + flat_file->indices[j] + - ((ADIO_Offset)n_filetypes)*filetype_extent; + /* Use list I/O in the base case */ + if (fd->hints->fs_hints.pvfs2.listio_write == ADIOI_HINT_ENABLE) { + ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, datatype, + file_ptr_type, offset, status, error_code); + return; } - *error_code = MPI_SUCCESS; -error_state: - fd->fp_sys_posn = -1; /* set it to null. */ - -#ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ -#endif - - if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); + /* Use classic list I/O if no hints given base case */ + ADIOI_PVFS2_OldWriteStrided(fd, buf, count, datatype, + file_ptr_type, offset, status, error_code); + return; } diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c new file mode 100644 index 0000000000..413977eef3 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c @@ -0,0 +1,963 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" +#include "adio_extern.h" +#include "ad_pvfs2.h" + +#include "ad_pvfs2_common.h" + +void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + /* as with all the other WriteStrided functions, offset is in units of + * etype relative to the filetype */ + + /* Since PVFS2 does not support file locking, can't do buffered writes + as on Unix */ + + ADIOI_Flatlist_node *flat_buf, *flat_file; + int i, j, k, bwr_size, fwr_size=0, st_index=0; + int bufsize, sum, n_etypes_in_filetype, size_in_filetype; + int n_filetypes, etype_in_filetype; + ADIO_Offset abs_off_in_filetype=0; + int filetype_size, etype_size, buftype_size; + MPI_Aint filetype_extent, buftype_extent; + int buf_count, buftype_is_contig, filetype_is_contig; + ADIO_Offset off, disp, start_off, initial_off; + int flag, st_fwr_size, st_n_filetypes; + int err_flag=0; + + int mem_list_count, file_list_count; + PVFS_size * mem_offsets; + int64_t *file_offsets; + int *mem_lengths; + int32_t *file_lengths; + int total_blks_to_write; + + int max_mem_list, max_file_list; + + int b_blks_wrote; + int f_data_wrote; + int size_wrote=0, n_write_lists, extra_blks; + + int end_bwr_size, end_fwr_size; + int start_k, start_j, new_file_write, new_buffer_write; + int start_mem_offset; + PVFS_Request mem_req, file_req; + ADIOI_PVFS2_fs * pvfs_fs; + PVFS_sysresp_io resp_io; + MPI_Offset total_bytes_written=0; + static char myname[] = "ADIOI_PVFS2_WRITESTRIDED"; + + /* note: don't increase this: several parts of PVFS2 now + * assume this limit*/ +#define MAX_ARRAY_SIZE 64 + + /* --BEGIN ERROR HANDLING-- */ + if (fd->atomicity) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_ARG, + "Atomic noncontiguous writes are not supported by PVFS2", 0); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + /* the HDF5 tests showed a bug in this list processing code (see many many + * lines down below). We added a workaround, but common HDF5 file types + * are actually contiguous and do not need the expensive workarond */ + if (!filetype_is_contig) { + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + if (flat_file->count == 1 && !buftype_is_contig) + filetype_is_contig = 1; + } + + MPI_Type_size(fd->filetype, &filetype_size); + if ( ! filetype_size ) { + *error_code = MPI_SUCCESS; + return; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + etype_size = fd->etype_size; + + bufsize = buftype_size * count; + + pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; + + if (!buftype_is_contig && filetype_is_contig) { + +/* noncontiguous in memory, contiguous in file. */ + int64_t file_offsets; + int32_t file_lengths; + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { + off = fd->disp + etype_size * offset; + } + else off = fd->fp_ind; + + file_list_count = 1; + file_offsets = off; + file_lengths = 0; + total_blks_to_write = count*flat_buf->count; + b_blks_wrote = 0; + + /* allocate arrays according to max usage */ + if (total_blks_to_write > MAX_ARRAY_SIZE) + mem_list_count = MAX_ARRAY_SIZE; + else mem_list_count = total_blks_to_write; + mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); + mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); + + j = 0; + /* step through each block in memory, filling memory arrays */ + while (b_blks_wrote < total_blks_to_write) { + for (i=0; icount; i++) { + mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = + /* TODO: fix this warning by casting to an integer that's + * the same size as a char * and /then/ casting to + * PVFS_size */ + ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); + mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = + flat_buf->blocklens[i]; + file_lengths += flat_buf->blocklens[i]; + b_blks_wrote++; + if (!(b_blks_wrote % MAX_ARRAY_SIZE) || + (b_blks_wrote == total_blks_to_write)) { + + /* in the case of the last write list call, + adjust mem_list_count */ + if (b_blks_wrote == total_blks_to_write) { + mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE; + /* in case last write list call fills max arrays */ + if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; + } + err_flag = PVFS_Request_hindexed(mem_list_count, + mem_lengths, mem_offsets, + PVFS_BYTE, &mem_req); + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_Request_hindexed (memory)", 0); + break; + } + /* --END ERROR HANDLING-- */ + + err_flag = PVFS_Request_contiguous(file_lengths, + PVFS_BYTE, &file_req); + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_Request_contiguous (file)", 0); + break; + } + /* --END ERROR HANDLING-- */ + +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); +#endif + err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, + file_offsets, PVFS_BOTTOM, + mem_req, + &(pvfs_fs->credentials), + &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); +#endif + total_bytes_written += resp_io.total_completed; + + /* in the case of error or the last write list call, + * leave here */ + /* --BEGIN ERROR HANDLING-- */ + if (err_flag) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_write", 0); + break; + } + /* --END ERROR HANDLING-- */ + if (b_blks_wrote == total_blks_to_write) break; + + file_offsets += file_lengths; + file_lengths = 0; + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + } + } /* for (i=0; icount; i++) */ + j++; + } /* while (b_blks_wrote < total_blks_to_write) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind += total_bytes_written; + + if (!err_flag) *error_code = MPI_SUCCESS; + + fd->fp_sys_posn = -1; /* clear this. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + + ADIOI_Delete_flattened(datatype); + return; + } /* if (!buftype_is_contig && filetype_is_contig) */ + + /* already know that file is noncontiguous from above */ + /* noncontiguous in file */ + +/* filetype already flattened in ADIO_Open */ + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + + disp = fd->disp; + initial_off = offset; + + /* for each case - ADIO_Individual pointer or explicit, find offset + (file offset in bytes), n_filetypes (how many filetypes into file + to start), fwr_size (remaining amount of data in present file + block), and st_index (start point in terms of blocks in starting + filetype) */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + offset = fd->fp_ind; /* in bytes */ + n_filetypes = -1; + flag = 0; + while (!flag) { + n_filetypes++; + for (i=0; icount; i++) { + if (disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] >= offset) { + st_index = i; + fwr_size = disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] - offset; + flag = 1; + break; + } + } + } /* while (!flag) */ + } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ + else { + n_etypes_in_filetype = filetype_size/etype_size; + n_filetypes = (int) (offset / n_etypes_in_filetype); + etype_in_filetype = (int) (offset % n_etypes_in_filetype); + size_in_filetype = etype_in_filetype * etype_size; + + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum > size_in_filetype) { + st_index = i; + fwr_size = sum - size_in_filetype; + abs_off_in_filetype = flat_file->indices[i] + + size_in_filetype - (sum - flat_file->blocklens[i]); + break; + } + } + + /* abs. offset in bytes in the file */ + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + abs_off_in_filetype; + } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ + + start_off = offset; + st_fwr_size = fwr_size; + st_n_filetypes = n_filetypes; + + if (buftype_is_contig && !filetype_is_contig) { + +/* contiguous in memory, noncontiguous in file. should be the most + common case. */ + + int mem_lengths; + char *mem_offsets; + + i = 0; + j = st_index; + off = offset; + n_filetypes = st_n_filetypes; + + mem_list_count = 1; + + /* determine how many blocks in file to write */ + f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); + total_blks_to_write = 1; + if (j < (flat_file->count -1)) j++; + else { + j = 0; + n_filetypes++; + } + while (f_data_wrote < bufsize) { + f_data_wrote += flat_file->blocklens[j]; + total_blks_to_write++; + if (j<(flat_file->count-1)) j++; + else j = 0; + } + + j = st_index; + n_filetypes = st_n_filetypes; + n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; + extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; + + mem_offsets = buf; + mem_lengths = 0; + + /* if at least one full writelist, allocate file arrays + at max array size and don't free until very end */ + if (n_write_lists) { + file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int64_t)); + file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int32_t)); + } + /* if there's no full writelist allocate file arrays according + to needed size (extra_blks) */ + else { + file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* + sizeof(int64_t)); + file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* + sizeof(int32_t)); + } + + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ + for (i=0; iindices[j]; + file_lengths[k] = flat_file->blocklens[j]; + mem_lengths += file_lengths[k]; + } + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kobject_ref, file_req, 0, + mem_offsets, mem_req, + &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_write", 0); + goto error_state; + } + /* --END ERROR HANDLING-- */ + total_bytes_written += resp_io.total_completed; + + mem_offsets += mem_lengths; + mem_lengths = 0; + PVFS_Request_free(&file_req); + PVFS_Request_free(&mem_req); + + } /* for (i=0; iindices[j]; + if (k == (extra_blks - 1)) { + file_lengths[k] = bufsize - (int32_t) mem_lengths + - (int32_t) mem_offsets + (int32_t) buf; + } + else file_lengths[k] = flat_file->blocklens[j]; + } /* if(i || k) */ + mem_lengths += file_lengths[k]; + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kobject_ref, file_req, 0, + mem_offsets, mem_req, + &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_write", 0); + goto error_state; + } + /* --END ERROR HANDLING-- */ + total_bytes_written += resp_io.total_completed; + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + } + } + else { + /* noncontiguous in memory as well as in file */ + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + size_wrote = 0; + n_filetypes = st_n_filetypes; + fwr_size = st_fwr_size; + bwr_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + max_mem_list = 0; + max_file_list = 0; + + /* run through and file max_file_list and max_mem_list so that you + can allocate the file and memory arrays less than MAX_ARRAY_SIZE + if possible */ + + while (size_wrote < bufsize) { + k = start_k; + new_buffer_write = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data written and data to be + written in the next immediate write list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k] + + size_wrote) > bufsize) { + end_bwr_size = new_buffer_write + + flat_buf->blocklens[k] - (bufsize - size_wrote); + new_buffer_write = bufsize - size_wrote; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + else new_buffer_write = bwr_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + j = start_j; + new_file_write = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { + if(file_list_count) { + if((new_file_write + flat_file->blocklens[j]) > + new_buffer_write) { + end_fwr_size = new_buffer_write - new_file_write; + new_file_write = new_buffer_write; + j--; + } + else { + new_file_write += flat_file->blocklens[j]; + end_fwr_size = flat_file->blocklens[j]; + } + } + else { + if (fwr_size > new_buffer_write) { + new_file_write = new_buffer_write; + fwr_size = new_file_write; + } + else new_file_write = fwr_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_write = 0; + mem_list_count = 0; + while (new_buffer_write < new_file_write) { + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k]) > + new_file_write) { + end_bwr_size = new_file_write - + new_buffer_write; + new_buffer_write = new_file_write; + k--; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_write = bwr_size; + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_write < new_file_write) */ + } /* if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + + /* fakes filling the writelist arrays of lengths found above */ + k = start_k; + j = start_j; + for (i=0; iblocklens[k] == end_bwr_size) + bwr_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + bwr_size = flat_buf->blocklens[k] - end_bwr_size; + k--; + buf_count--; + } + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iblocklens[j] == end_fwr_size) + fwr_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + fwr_size = flat_file->blocklens[j] - end_fwr_size; + j--; + } + } + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iblocklens[0] ) ) || + ((mem_list_count == 1) && + (new_buffer_write < flat_buf->blocklens[0]) ) || + ((file_list_count == MAX_ARRAY_SIZE) && + (new_file_write < flat_buf->blocklens[0]) ) || + ( (mem_list_count == MAX_ARRAY_SIZE) && + (new_buffer_write < flat_file->blocklens[0])) ) + { + ADIOI_Delete_flattened(datatype); + ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, + file_ptr_type, initial_off, status, error_code); + return; + } + + + mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); + mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); + file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); + file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); + + size_wrote = 0; + n_filetypes = st_n_filetypes; + fwr_size = st_fwr_size; + bwr_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + + /* this section calculates mem_list_count and file_list_count + and also finds the possibly odd sized last array elements + in new_fwr_size and new_bwr_size */ + + while (size_wrote < bufsize) { + k = start_k; + new_buffer_write = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data written and data to be + written in the next immediate write list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k] + + size_wrote) > bufsize) { + end_bwr_size = new_buffer_write + + flat_buf->blocklens[k] - (bufsize - size_wrote); + new_buffer_write = bufsize - size_wrote; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + else new_buffer_write = bwr_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + j = start_j; + new_file_write = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { + if(file_list_count) { + if((new_file_write + flat_file->blocklens[j]) > + new_buffer_write) { + end_fwr_size = new_buffer_write - new_file_write; + new_file_write = new_buffer_write; + j--; + } + else { + new_file_write += flat_file->blocklens[j]; + end_fwr_size = flat_file->blocklens[j]; + } + } + else { + if (fwr_size > new_buffer_write) { + new_file_write = new_buffer_write; + fwr_size = new_file_write; + } + else new_file_write = fwr_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_write = 0; + mem_list_count = 0; + while (new_buffer_write < new_file_write) { + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k]) > + new_file_write) { + end_bwr_size = new_file_write - + new_buffer_write; + new_buffer_write = new_file_write; + k--; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_write = bwr_size; + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_write < new_file_write) */ + } /* if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + + /* fills the allocated writelist arrays */ + k = start_k; + j = start_j; + for (i=0; icount) + + (int)flat_buf->indices[k]); + + if(!i) { + mem_lengths[0] = bwr_size; + mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; + } + else { + if (i == (mem_list_count - 1)) { + mem_lengths[i] = end_bwr_size; + if (flat_buf->blocklens[k] == end_bwr_size) + bwr_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + bwr_size = flat_buf->blocklens[k] - end_bwr_size; + k--; + buf_count--; + } + } + else { + mem_lengths[i] = flat_buf->blocklens[k]; + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iindices[j] + + ((ADIO_Offset)n_filetypes) * filetype_extent; + if (!i) { + file_lengths[0] = fwr_size; + file_offsets[0] += flat_file->blocklens[j] - fwr_size; + } + else { + if (i == (file_list_count - 1)) { + file_lengths[i] = end_fwr_size; + if (flat_file->blocklens[j] == end_fwr_size) + fwr_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + fwr_size = flat_file->blocklens[j] - end_fwr_size; + j--; + } + } + else file_lengths[i] = flat_file->blocklens[j]; + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iobject_ref, file_req, 0, + PVFS_BOTTOM, mem_req, + &(pvfs_fs->credentials), &resp_io); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_PVFS2_error_convert(err_flag), + "Error in PVFS_sys_write", 0); + goto error_state; + } + /* --END ERROR HANDLING-- */ + + size_wrote += new_buffer_write; + total_bytes_written += resp_io.total_completed; + start_k = k; + start_j = j; + PVFS_Request_free(&mem_req); + PVFS_Request_free(&file_req); + } /* while (size_wrote < bufsize) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + } + /* when incrementing fp_ind, need to also take into account the file type: + * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| + * if we wrote N elements, offset needs to point at beginning of type, not + * at empty region at offset N+1). + * + * As we discussed on mpich-discuss in may/june 2009, the code below might + * look wierd, but by putting fp_ind at the last byte written, the next + * time we run through the strided code we'll update the fp_ind to the + * right location. */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + fd->fp_ind = file_offsets[file_list_count-1]+ + file_lengths[file_list_count-1]; + } + ADIOI_Free(file_offsets); + ADIOI_Free(file_lengths); + + *error_code = MPI_SUCCESS; + +error_state: + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); +} diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c index 29c6835345..36286c7a6e 100644 --- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_TESTFS_operations = { ADIOI_TESTFS_Open, /* Open */ + ADIOI_GEN_OpenColl, /* OpenColl */ ADIOI_TESTFS_ReadContig, /* ReadContig */ ADIOI_TESTFS_WriteContig, /* WriteContig */ ADIOI_TESTFS_ReadStridedColl, /* ReadStridedColl */ @@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_TESTFS_operations = { ADIOI_TESTFS_Flush, /* Flush */ ADIOI_TESTFS_Resize, /* Resize */ ADIOI_TESTFS_Delete, /* Delete */ + ADIOI_GEN_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c index 9c72bcda7d..a7b1b7540f 100644 --- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c @@ -7,7 +7,9 @@ #include "ad_testfs.h" #include "adioi.h" - +#ifdef ROMIO_BGL +#include "../ad_bgl/ad_bgl.h" +#endif void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { int myrank, nprocs; @@ -21,5 +23,10 @@ void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_SetInfo\n", myrank, nprocs); +#ifdef ROMIO_BGL /* BlueGene support for pvfs through ufs */ + /* BlueGene hack: force testfs to mimic BlueGene hints */ + ADIOI_BGL_SetInfo(fd, users_info, error_code); +#else ADIOI_GEN_SetInfo(fd, users_info, error_code); +#endif } diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c index 5966f81dfe..bf911d460c 100644 --- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c @@ -26,10 +26,6 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, offset = fd->fp_ind; fd->fp_ind += datatype_size * count; fd->fp_sys_posn = fd->fp_ind; -#if 0 - FPRINTF(stdout, "[%d/%d] new file position is %lld\n", myrank, - nprocs, (long long) fd->fp_ind); -#endif } else { fd->fp_sys_posn = offset + datatype_size * count; diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c index 93bf3757f4..4b23bc7940 100644 --- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c +++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c @@ -26,8 +26,8 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ADIO_Offset off; ADIOI_Flatlist_node *flat_file; int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int size_in_filetype, sum; + ADIO_Offset abs_off_in_filetype=0, sum; + int size_in_filetype; int filetype_size, etype_size, filetype_is_contig; MPI_Aint filetype_extent; @@ -54,6 +54,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, } n_etypes_in_filetype = filetype_size/etype_size; + ADIOI_Assert((offset / n_etypes_in_filetype) == (int) (offset / n_etypes_in_filetype)); n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; @@ -70,7 +71,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, } /* abs. offset in bytes in the file */ - off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent + + off = fd->disp + (ADIO_Offset)n_filetypes * (ADIO_Offset)filetype_extent + abs_off_in_filetype; } diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c index 3306b6edcf..c3eea50f60 100644 --- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c @@ -23,7 +23,7 @@ void ADIOI_TESTFS_WriteContig(ADIO_File fd, void *buf, int count, nprocs, fd->filename); FPRINTF(stdout, "[%d/%d] writing (buf = %p, loc = %lld, sz = %lld)\n", myrank, nprocs, buf, (long long) offset, - (long long) datatype_size * count); + (long long)datatype_size * (long long)count); if (file_ptr_type != ADIO_EXPLICIT_OFFSET) { diff --git a/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c b/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c index ce7bd03094..014222a508 100644 --- a/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c +++ b/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_UFS_operations = { ADIOI_UFS_Open, /* Open */ + ADIOI_GEN_OpenColl, /* OpenColl */ ADIOI_GEN_ReadContig, /* ReadContig */ ADIOI_GEN_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -38,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_UFS_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am index 8541ee271d..b077408789 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am @@ -22,13 +22,9 @@ noinst_LTLIBRARIES = libadio_xfs.la libadio_xfs_la_SOURCES = \ ad_xfs.c \ ad_xfs.h \ - ad_xfs_done.c \ ad_xfs_fcntl.c \ ad_xfs_hints.c \ - ad_xfs_iread.c \ - ad_xfs_iwrite.c \ ad_xfs_open.c \ ad_xfs_read.c \ ad_xfs_resize.c \ - ad_xfs_wait.c \ ad_xfs_write.c diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c index c04ef8fd4f..07730aa2d7 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c @@ -12,6 +12,7 @@ struct ADIOI_Fns_struct ADIO_XFS_operations = { ADIOI_XFS_Open, /* Open */ + ADIOI_GEN_OpenColl, /* OpenColl */ ADIOI_XFS_ReadContig, /* ReadContig */ ADIOI_XFS_WriteContig, /* WriteContig */ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ @@ -22,15 +23,21 @@ struct ADIOI_Fns_struct ADIO_XFS_operations = { ADIOI_GEN_ReadStrided, /* ReadStrided */ ADIOI_GEN_WriteStrided, /* WriteStrided */ ADIOI_GEN_Close, /* Close */ - ADIOI_XFS_IreadContig, /* IreadContig */ - ADIOI_XFS_IwriteContig, /* IwriteContig */ - ADIOI_XFS_ReadDone, /* ReadDone */ - ADIOI_XFS_WriteDone, /* WriteDone */ - ADIOI_XFS_ReadComplete, /* ReadComplete */ - ADIOI_XFS_WriteComplete, /* WriteComplete */ +#if defined(ROMIO_HAVE_WORKING_AIO) + ADIOI_GEN_IreadContig, /* IreadContig */ + ADIOI_GEN_IwriteContig, /* IwriteContig */ +#else + ADIOI_FAKE_IreadContig, /* IreadContig */ + ADIOI_FAKE_IwriteContig, /* IwriteContig */ +#endif /* ROMIO_HAVE_WORKING_AIO */ + ADIOI_GEN_IODone, /* ReadDone */ + ADIOI_GEN_IODone, /* WriteDone */ + ADIOI_GEN_IOComplete, /* ReadComplete */ + ADIOI_GEN_IOComplete, /* WriteComplete */ ADIOI_GEN_IreadStrided, /* IreadStrided */ ADIOI_GEN_IwriteStrided, /* IwriteStrided */ ADIOI_GEN_Flush, /* Flush */ ADIOI_XFS_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ + ADIOI_GEN_Feature, /* Features */ }; diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h index 2d81688ee2..c529abcd91 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h @@ -8,20 +8,19 @@ #ifndef AD_XFS_INCLUDE #define AD_XFS_INCLUDE +#define _XOPEN_SOURCE 500 #include #include #include #include "adio.h" -#include -int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, - int wr, void *handle); - -#if (defined(HAVE_PREAD64) && (_ABIO32 == 1)) -# define pread pread64 -# define pwrite pwrite64 +#if defined(MPISGI) +#include "xfs/xfs_fs.h" +#ifndef __USE_LARGEFILE64 +#define __USE_LARGEFILE64 +#endif +typedef struct aiocb64 aiocb64_t; #endif -/* above needed for IRIX 6.5 */ void ADIOI_XFS_Open(ADIO_File fd, int *error_code); void ADIOI_XFS_Close(ADIO_File fd, int *error_code); @@ -33,22 +32,6 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int - *error_code); -int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int - *error_code); -int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int - *error_code); -void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); -void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code); void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c deleted file mode 100644 index ebdca50eed..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * - * Copyright (C) 1997 University of Chicago. - * See COPYRIGHT notice in top-level directory. - */ - -#include "ad_xfs.h" - -int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status, - int *error_code) -{ - int err, done=0; - static char myname[] = "ADIOI_XFS_READDONE"; - - if (*request == ADIO_REQUEST_NULL) { - *error_code = MPI_SUCCESS; - return 1; - } - - if ((*request)->queued) { - errno = aio_error64((const aiocb64_t *) (*request)->handle); - if (errno == EINPROGRESS) { - done = 0; - *error_code = MPI_SUCCESS; - } - else { - err = aio_return64((aiocb64_t *) (*request)->handle); - (*request)->nbytes = err; - errno = aio_error64((const aiocb64_t *) (*request)->handle); - - done = 1; - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, - __LINE__, MPI_ERR_IO, "**io", - "**io %s", strerror(errno)); - } - else *error_code = MPI_SUCCESS; - } - } /* if ((*request)->queued) */ - else { - done = 1; - *error_code = MPI_SUCCESS; - } -#ifdef HAVE_STATUS_SET_BYTES - if (done && ((*request)->nbytes != -1)) - MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); -#endif - - if (done) { - /* if request is still queued in the system, it is also there - on ADIOI_Async_list. Delete it from there. */ - if ((*request)->queued) ADIOI_Del_req_from_list(request); - - (*request)->fd->async_count--; - if ((*request)->handle) ADIOI_Free((*request)->handle); - ADIOI_Free_request((ADIOI_Req_node *) (*request)); - *request = ADIO_REQUEST_NULL; - /* status to be filled */ - } - return done; -} - - -int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code) -{ - return ADIOI_XFS_ReadDone(request, status, error_code); -} diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c index e9194f4bbd..7c49da24da 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c @@ -7,6 +7,11 @@ #include "ad_xfs.h" #include "adio_extern.h" +#include + +#ifndef HAVE_LSEEK64 +#define lseek64 lseek +#endif void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) { @@ -37,7 +42,7 @@ void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er fl.l_len = fcntl_struct->diskspace; #if defined(LINUX) && defined(MPISGI) - err = fcntl(fd->fd_sys, XFS_IOC_RESVSP64, &fl); + err = ioctl(fd->fd_sys, XFS_IOC_RESVSP64, &fl); #else err = fcntl(fd->fd_sys, F_RESVSP64, &fl); #endif diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c index d6bff11337..97909b3819 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c @@ -8,36 +8,76 @@ #include "ad_xfs.h" #include "adio_extern.h" +static unsigned xfs_direct_read_chunk_size; +static unsigned xfs_direct_write_chunk_size; + void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { - char *value; + char *value, * c; int flag; + static char xfs_initialized = 0; if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info)); - /* the nightly builds say somthing is calling MPI_Info_set w/ a null info, - * so protect the calls to MPI_Info_set */ - if (fd->info != MPI_INFO_NULL ) { - MPI_Info_set(fd->info, "direct_read", "false"); - MPI_Info_set(fd->info, "direct_write", "false"); - fd->direct_read = fd->direct_write = 0; - } - - /* has user specified values for keys "direct_read" and "direct wirte"? */ + ADIOI_Info_set(fd->info, "direct_read", "false"); + ADIOI_Info_set(fd->info, "direct_write", "false"); + fd->direct_read = fd->direct_write = 0; + + if (!xfs_initialized) { + xfs_initialized = 1; + c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE"); + if (c) { + int io; + io = atoi(c); + if (io <= 0) { + fprintf(stderr, +"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n" +" It must be set to a positive integer value.\n"); + } else { + xfs_direct_read_chunk_size = io; + } + } else { + xfs_direct_read_chunk_size = 0; + } + + c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE"); + if (c) { + int io; + io = atoi(c); + if (io <= 0) { + fprintf(stderr, +"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n" +" It must be set to a positive integer value.\n"); + } else { + xfs_direct_write_chunk_size = io; + } + } else { + xfs_direct_write_chunk_size = 0; + } + } + + if (!fd->hints->initialized) { + fd->hints->fs_hints.xfs.read_chunk_sz = + xfs_direct_read_chunk_size; + fd->hints->fs_hints.xfs.write_chunk_sz = + xfs_direct_write_chunk_size; + } + + /* has user specified values for keys "direct_read" and "direct write"? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, value, &flag); if (flag && !strcmp(value, "true")) { - MPI_Info_set(fd->info, "direct_read", "true"); + ADIOI_Info_set(fd->info, "direct_read", "true"); fd->direct_read = 1; } - MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, value, &flag); if (flag && !strcmp(value, "true")) { - MPI_Info_set(fd->info, "direct_write", "true"); + ADIOI_Info_set(fd->info, "direct_write", "true"); fd->direct_write = 1; } @@ -47,8 +87,10 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); + /* Environment variables override MPI_Info hints */ if (ADIOI_Direct_read) fd->direct_read = 1; if (ADIOI_Direct_write) fd->direct_write = 1; + /* environment variables checked in ADIO_Init */ *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c deleted file mode 100644 index a85062ba72..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. - * See COPYRIGHT notice in top-level directory. - */ - -#include "ad_xfs.h" - -void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int *error_code) -{ - int len, typesize, aio_errno = 0; - static char myname[] = "ADIOI_XFS_IREADCONTIG"; - - (*request) = ADIOI_Malloc_request(); - (*request)->optype = ADIOI_READ; - (*request)->fd = fd; - (*request)->datatype = datatype; - - MPI_Type_size(datatype, &typesize); - len = count * typesize; - - if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; - aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 0, &((*request)->handle)); - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; - - (*request)->queued = 1; - ADIOI_Add_req_to_list(request); - - fd->fp_sys_posn = -1; - - /* --BEGIN ERROR HANDLING-- */ - if (aio_errno != 0) { - MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code); - return; - } - /* --END ERROR HANDLING-- */ - - *error_code = MPI_SUCCESS; - fd->async_count++; -} diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c deleted file mode 100644 index 61980621f7..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c +++ /dev/null @@ -1,145 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * - * Copyright (C) 1997 University of Chicago. - * See COPYRIGHT notice in top-level directory. - */ - -#include "ad_xfs.h" - -void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, - int *error_code) -{ - int len, typesize, aio_errno = 0; - static char myname[] = "ADIOI_XFS_IWRITECONTIG"; - - *request = ADIOI_Malloc_request(); - (*request)->optype = ADIOI_WRITE; - (*request)->fd = fd; - (*request)->datatype = datatype; - - MPI_Type_size(datatype, &typesize); - len = count * typesize; - - if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; - aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 1, &((*request)->handle)); - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; - - (*request)->queued = 1; - ADIOI_Add_req_to_list(request); - - fd->fp_sys_posn = -1; - - /* --BEGIN ERROR HANDLING-- */ - if (aio_errno != 0) { - MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code); - return; - } - /* --END ERROR HANDLING-- */ - - *error_code = MPI_SUCCESS; - fd->async_count++; -} - - -void ADIOI_XFS_IwriteStrided(ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int - *error_code) -{ - ADIO_Status status; -#ifdef HAVE_STATUS_SET_BYTES - int typesize; -#endif - - *request = ADIOI_Malloc_request(); - (*request)->optype = ADIOI_WRITE; - (*request)->fd = fd; - (*request)->datatype = datatype; - (*request)->queued = 0; - (*request)->handle = 0; - -/* call the blocking version. It is faster because it does data sieving. */ - ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); - - fd->async_count++; - -#ifdef HAVE_STATUS_SET_BYTES - if (*error_code == MPI_SUCCESS) { - MPI_Type_size(datatype, &typesize); - (*request)->nbytes = count * typesize; - } -#endif -} - - -/* This function is for implementation convenience. It is not user-visible. - * It takes care of the differences in the interface for nonblocking I/O - * on various Unix machines! If wr==1 write, wr==0 read. - * - * Returns 0 on success, -errno on failure. - */ -int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, - int wr, void *handle) -{ - int err, error_code; - aiocb64_t *aiocbp; - - aiocbp = (aiocb64_t *) ADIOI_Calloc(sizeof(aiocb64_t), 1); - - if (((wr && fd->direct_write) || (!wr && fd->direct_read)) - && !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && - !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && - (len <= fd->d_maxiosz)) - aiocbp->aio_fildes = fd->fd_direct; - else aiocbp->aio_fildes = fd->fd_sys; - - aiocbp->aio_offset = offset; - aiocbp->aio_buf = buf; - aiocbp->aio_nbytes = len; - aiocbp->aio_reqprio = 0; - -#ifdef AIO_SIGNOTIFY_NONE -/* SGI IRIX 6 */ - aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE; -#else - aiocbp->aio_sigevent.sigev_signo = 0; -#endif - - if (wr) err = aio_write64(aiocbp); - else err = aio_read64(aiocbp); - - if (err != 0) { - if (errno == EAGAIN) { - /* exceeded the max. no. of outstanding requests. - complete all previous async. requests and try again. */ - - /* ADIOI_Complete_async(&error_code); */ - if (error_code != MPI_SUCCESS) return -EIO; - - if (wr) err = aio_write64(aiocbp); - else err = aio_read64(aiocbp); - - while (err != 0) { - if (errno == EAGAIN) { - /* sleep and try again */ - sleep(1); - if (wr) err = aio_write64(aiocbp); - else err = aio_read64(aiocbp); - } - else { - return -errno; - } - } - } - else { - return -errno; - } - } - - *((aiocb64_t **) handle) = aiocbp; - return 0; -} diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c index b993644f46..fa073fb316 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c @@ -5,22 +5,26 @@ * See COPYRIGHT notice in top-level directory. */ +#define _GNU_SOURCE // for O_DIRECT + #include "ad_xfs.h" +#include #ifdef HAVE_STDDEF_H #include #endif -#if defined(MPISGI) -#include -#include +#ifndef HAVE_LSEEK64 +#define lseek64 lseek #endif void ADIOI_XFS_Open(ADIO_File fd, int *error_code) { - int perm, amode, amode_direct; + int perm, amode, amode_direct, factor; unsigned int old_mask; struct dioattr st; static char myname[] = "ADIOI_XFS_OPEN"; + unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz; + unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz; if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); @@ -49,7 +53,7 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code) fd->fd_direct = open(fd->filename, amode_direct, perm); if (fd->fd_direct != -1) { -#if defined(LINUX) && defined(MPISGI) +#if defined(MPISGI) ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st); #else fcntl(fd->fd_direct, F_DIOINFO, &st); @@ -57,7 +61,34 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code) fd->d_mem = st.d_mem; fd->d_miniosz = st.d_miniosz; - fd->d_maxiosz = st.d_maxiosz; + + if (read_chunk_sz == 0) { + fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz; + } else { + /* + * MPIO_DIRECT_READ_CHUNK_SIZE was set. + * Make read_chunk_sz a multiple of d_miniosz. + */ + factor = read_chunk_sz / fd->d_miniosz; + if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) { + fd->hints->fs_hints.xfs.read_chunk_sz = + fd->d_miniosz * (factor + 1); + } + } + + if (write_chunk_sz == 0) { + fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz; + } else { + /* + * MPIO_DIRECT_WRITE_CHUNK_SIZE was set. + * Make write_chunk_sz a multiple of d_miniosz. + */ + factor = write_chunk_sz / fd->d_miniosz; + if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) { + fd->hints->fs_hints.xfs.write_chunk_sz = + fd->d_miniosz * (factor + 1); + } + } if (fd->d_mem > XFS_MEMALIGN) { FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n", diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c index 8396dd87f5..38b28f0e3b 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c @@ -63,7 +63,7 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err); if (err > 0) memcpy(buf, newbuf, err); nbytes += err; - free(newbuf); + ADIOI_Free(newbuf); } else nbytes += pread(fd->fd_sys, buf, size, offset); } @@ -77,7 +77,7 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, if (newbuf) { ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, len, offset, &err); if (err > 0) memcpy(buf, newbuf, err); - free(newbuf); + ADIOI_Free(newbuf); } else err = pread(fd->fd_sys, buf, len, offset); } @@ -102,6 +102,7 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int *err) { int ntimes, rem, newrem, i, size, nbytes; + unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz; /* memory buffer is aligned, offset in file is aligned, io_size may or may not be of the right size. @@ -109,33 +110,33 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, use buffered I/O for remaining. */ if (!(len % fd->d_miniosz) && - (len >= fd->d_miniosz) && (len <= fd->d_maxiosz)) + (len >= fd->d_miniosz) && (len <= read_chunk_sz)) *err = pread(fd->fd_direct, buf, len, offset); else if (len < fd->d_miniosz) *err = pread(fd->fd_sys, buf, len, offset); - else if (len > fd->d_maxiosz) { - ntimes = len/(fd->d_maxiosz); - rem = len - ntimes * fd->d_maxiosz; + else if (len > read_chunk_sz) { + ntimes = len/(read_chunk_sz); + rem = len - ntimes * read_chunk_sz; nbytes = 0; for (i=0; ifd_direct, ((char *)buf) + i * fd->d_maxiosz, - fd->d_maxiosz, offset); - offset += fd->d_maxiosz; + nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz, + read_chunk_sz, offset); + offset += read_chunk_sz; } if (rem) { if (!(rem % fd->d_miniosz)) nbytes += pread(fd->fd_direct, - ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset); + ((char *)buf) + ntimes * read_chunk_sz, rem, offset); else { newrem = rem % fd->d_miniosz; size = rem - newrem; if (size) { nbytes += pread(fd->fd_direct, - ((char *)buf) + ntimes * fd->d_maxiosz, size, offset); + ((char *)buf) + ntimes * read_chunk_sz, size, offset); offset += size; } nbytes += pread(fd->fd_sys, - ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset); + ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset); } } *err = nbytes; diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c deleted file mode 100644 index 93a0bdbfce..0000000000 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * - * Copyright (C) 1997 University of Chicago. - * See COPYRIGHT notice in top-level directory. - */ - -#include "ad_xfs.h" - -void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code) -{ - int err; - static char myname[] = "ADIOI_XFS_READCOMPLETE"; - - if (*request == ADIO_REQUEST_NULL) { - *error_code = MPI_SUCCESS; - return; - } - - if ((*request)->queued) { - do { - err = aio_suspend64((const aiocb64_t **) &((*request)->handle), 1, 0); - } while ((err == -1) && (errno == EINTR)); - - if (err != -1) { - err = aio_return64((aiocb64_t *) (*request)->handle); - (*request)->nbytes = err; - errno = aio_error64((aiocb64_t *) (*request)->handle); - } - else (*request)->nbytes = -1; - - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, - __LINE__, MPI_ERR_IO, "**io", - "**io %s", strerror(errno)); - } - else *error_code = MPI_SUCCESS; - } /* if ((*request)->queued) */ - else *error_code = MPI_SUCCESS; - -#ifdef HAVE_STATUS_SET_BYTES - if ((*request)->nbytes != -1) - MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); -#endif - - if ((*request)->queued != -1) { - - /* queued = -1 is an internal hack used when the request must - be completed, but the request object should not be - freed. This is used in ADIOI_Complete_async, because the user - will call MPI_Wait later, which would require status to - be filled. Ugly but works. queued = -1 should be used only - in ADIOI_Complete_async. - This should not affect the user in any way. */ - - /* if request is still queued in the system, it is also there - on ADIOI_Async_list. Delete it from there. */ - if ((*request)->queued) ADIOI_Del_req_from_list(request); - - (*request)->fd->async_count--; - if ((*request)->handle) ADIOI_Free((*request)->handle); - ADIOI_Free_request((ADIOI_Req_node *) (*request)); - *request = ADIO_REQUEST_NULL; - } -} - - -void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) -{ - ADIOI_XFS_ReadComplete(request, status, error_code); -} diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c index 0c01352314..ecb9c7b801 100644 --- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c +++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c @@ -13,14 +13,15 @@ /* style: allow:free:2 sig:0 */ -static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, - ADIO_Offset offset, int *err); +static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, + ADIO_Offset len, ADIO_Offset offset); void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err=-1, datatype_size, len, diff, size, nbytes; + int err=-1, datatype_size, diff, size; + ssize_t len; void *newbuf; static char myname[] = "ADIOI_XFS_WRITECONTIG"; @@ -31,44 +32,48 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; - if (!(fd->direct_write)) /* direct I/O not enabled */ + if (!(fd->direct_write)) { /* direct I/O not enabled */ err = pwrite(fd->fd_sys, buf, len, offset); - else { /* direct I/O enabled */ + if (err < 0) {goto leaving;} + } else { /* direct I/O enabled */ /* (1) if mem_aligned && file_aligned use direct I/O to write up to correct io_size use buffered I/O for remaining */ - if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) - ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err); + if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) { + err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset); + if (err < 0) {goto leaving;} /* (2) if !file_aligned use buffered I/O to write up to file_aligned At that point, if still mem_aligned, use (1) else copy into aligned buf and then use (1) */ - else if (offset % fd->d_miniosz) { + } else if (offset % fd->d_miniosz) { diff = fd->d_miniosz - (offset % fd->d_miniosz); diff = ADIOI_MIN(diff, len); - nbytes = pwrite(fd->fd_sys, buf, diff, offset); + err = pwrite(fd->fd_sys, buf, diff, offset); + if (err < 0) {goto leaving;} buf = ((char *) buf) + diff; offset += diff; size = len - diff; if (!(((long) buf) % fd->d_mem)) { - ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err); - nbytes += err; + err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset); + if (err < 0) {goto leaving;} } else { newbuf = (void *) memalign(XFS_MEMALIGN, size); if (newbuf) { memcpy(newbuf, buf, size); - ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err); - nbytes += err; - free(newbuf); + err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset); + ADIOI_Free(newbuf); + if (err < 0) {goto leaving;} + } else { + err = pwrite(fd->fd_sys, buf, size, offset); + if (err < 0) {goto leaving;} } - else nbytes += pwrite(fd->fd_sys, buf, size, offset); } - err = nbytes; } /* (3) if !mem_aligned && file_aligned @@ -77,19 +82,22 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, newbuf = (void *) memalign(XFS_MEMALIGN, len); if (newbuf) { memcpy(newbuf, buf, len); - ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err); - free(newbuf); + err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset); + ADIOI_Free(newbuf); + } else { + err = pwrite(fd->fd_sys, buf, len, offset); } - else err = pwrite(fd->fd_sys, buf, len, offset); + + if (err < 0) {goto leaving;} } } - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err; + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; #ifdef HAVE_STATUS_SET_BYTES - if (err != -1) MPIR_Status_set_bytes(status, datatype, err); + if (err != -1) MPIR_Status_set_bytes(status, datatype, len); #endif - +leaving: if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", @@ -99,10 +107,13 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, } -void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, - ADIO_Offset offset, int *err) +static int +ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, + ADIO_Offset offset) { - int ntimes, rem, newrem, i, size, nbytes; + unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz; + ADIO_Offset nbytes, rem, newrem, size; + int ntimes, i; /* memory buffer is aligned, offset in file is aligned, io_size may or may not be of the right size. @@ -110,42 +121,50 @@ void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, use buffered I/O for remaining. */ if (!(len % fd->d_miniosz) && - (len >= fd->d_miniosz) && (len <= fd->d_maxiosz)) - *err = pwrite(fd->fd_direct, buf, len, offset); - else if (len < fd->d_miniosz) - *err = pwrite(fd->fd_sys, buf, len, offset); - else if (len > fd->d_maxiosz) { - ntimes = len/(fd->d_maxiosz); - rem = len - ntimes * fd->d_maxiosz; + (len >= fd->d_miniosz) && (len <= write_chunk_sz)) { + nbytes = pwrite(fd->fd_direct, buf, len, offset); + if (nbytes < 0) {return -1;} + } else if (len < fd->d_miniosz) { + nbytes = pwrite(fd->fd_sys, buf, len, offset); + if (nbytes < 0) {return -1;} + } else if (len > write_chunk_sz) { + ntimes = len/(write_chunk_sz); + rem = len - ntimes * write_chunk_sz; nbytes = 0; for (i=0; ifd_direct, ((char *)buf) + i * fd->d_maxiosz, - fd->d_maxiosz, offset); - offset += fd->d_maxiosz; + nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz, + write_chunk_sz, offset); + offset += write_chunk_sz; + if (nbytes < 0) {return -1;} } if (rem) { - if (!(rem % fd->d_miniosz)) - nbytes += pwrite(fd->fd_direct, - ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset); - else { + if (!(rem % fd->d_miniosz)) { + nbytes = pwrite(fd->fd_direct, + ((char *)buf) + ntimes * write_chunk_sz, rem, offset); + if (nbytes < 0) {return -1;} + } else { newrem = rem % fd->d_miniosz; size = rem - newrem; if (size) { - nbytes += pwrite(fd->fd_direct, - ((char *)buf) + ntimes * fd->d_maxiosz, size, offset); + nbytes = pwrite(fd->fd_direct, + ((char *)buf) + ntimes * write_chunk_sz, size, offset); offset += size; + if (nbytes < 0) {return -1;} } - nbytes += pwrite(fd->fd_sys, - ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset); + nbytes = pwrite(fd->fd_sys, + ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset); + if (nbytes < 0) {return -1;} } } - *err = nbytes; } else { rem = len % fd->d_miniosz; size = len - rem; nbytes = pwrite(fd->fd_direct, buf, size, offset); - nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size); - *err = nbytes; + if (nbytes < 0) {return -1;} + nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size); + if (nbytes < 0) {return -1;} } + + return 0; } diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am new file mode 100644 index 0000000000..dfa5b419cd --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am @@ -0,0 +1,37 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +include $(top_srcdir)/Makefile.options + +EXTRA_DIST = README + +noinst_LTLIBRARIES = libadio_zoidf.la +libadio_zoidf_la_SOURCES = \ + ad_zoidfs.o \ + ad_zoidfs_close.o \ + ad_zoidfs_common.o \ + ad_zoidfs_delete.o \ + ad_zoidfs_fcntl.o \ + ad_zoidfs_flush.o \ + ad_zoidfs_io.o \ + ad_zoidfs_open.o \ + ad_zoidfs_resize.o \ + ad_zoidfs_features.o\ + ad_zoidfs_read_list.o \ + ad_zoidfs_write_list.o + diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c new file mode 100644 index 0000000000..28b8ea54e6 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2003 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" + +/* adioi.h has the ADIOI_Fns_struct define */ +#include "adioi.h" + +struct ADIOI_Fns_struct ADIO_ZOIDFS_operations = { + ADIOI_ZOIDFS_Open, /* Open */ + ADIOI_SCALEABLE_OpenColl, /* OpenColl */ + ADIOI_ZOIDFS_ReadContig, /* ReadContig */ + ADIOI_ZOIDFS_WriteContig, /* WriteContig */ + ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ + ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */ + ADIOI_GEN_SeekIndividual, /* SeekIndividual */ + ADIOI_ZOIDFS_Fcntl, /* Fcntl */ + ADIOI_GEN_SetInfo, /* SetInfo */ + ADIOI_GEN_ReadStrided, /* ReadStrided */ + ADIOI_ZOIDFS_WriteStrided, /* WriteStrided */ + ADIOI_ZOIDFS_Close, /* Close */ + ADIOI_FAKE_IreadContig, /* IreadContig */ + ADIOI_FAKE_IwriteContig, /* IwriteContig */ + ADIOI_FAKE_IODone, /* ReadDone */ + ADIOI_FAKE_IODone, /* WriteDone */ + ADIOI_FAKE_IOComplete, /* ReadComplete */ + ADIOI_FAKE_IOComplete, /* WriteComplete */ + ADIOI_FAKE_IreadStrided, /* IreadStrided */ + ADIOI_FAKE_IwriteStrided, /* IwriteStrided */ + ADIOI_ZOIDFS_Flush, /* Flush */ + ADIOI_ZOIDFS_Resize, /* Resize */ + ADIOI_ZOIDFS_Delete, /* Delete */ + ADIOI_ZOIDFS_Feature, +}; + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h new file mode 100644 index 0000000000..03b2a57662 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#ifndef AD_ZOIDFS_INCLUDE +#define AD_ZOIDFS_INCLUDE + +#include "adio.h" +#ifdef HAVE_ZOIDFS_H +#include "zoidfs.h" +#endif + + +typedef zoidfs_handle_t ADIOI_ZOIDFS_object; + +void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code); +void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code); +void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int + *error_code); +void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code); +void ADIOI_ZOIDFS_Delete(char *filename, int *error_code); +void ADIOI_ZOIDFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); +void ADIOI_ZOIDFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); +int ADIOI_ZOIDFS_Feature(ADIO_File fd, int flag); +#endif diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c new file mode 100644 index 0000000000..1bee6b83e9 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" + +void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code) +{ + ADIOI_Free(fd->fs_ptr); + fd->fs_ptr = NULL; + + /* At some point or another it was decided that ROMIO would not + * explicitly flush (other than any local cache) on close, because + * there is no way to *avoid* that overhead if you implement it here + * and don't actually want it. + */ + + *error_code = MPI_SUCCESS; +} +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c new file mode 100644 index 0000000000..3437359da7 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c @@ -0,0 +1,126 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 2003 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "ad_zoidfs_common.h" +#include +#include + +/* keyval hack to both tell us if we've already initialized zoidfs and also + * close it down when mpi exits */ +int ADIOI_ZOIDFS_Initialized = MPI_KEYVAL_INVALID; + +void ADIOI_ZOIDFS_End(int *error_code) +{ + int ret; + static char myname[] = "ADIOI_ZOIDFS_END"; + + ret = zoidfs_finalize(); + + /* --BEGIN ERROR HANDLING-- */ + if (ret != 0 ) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_finalize", 0); + return; + } + /* --END ERROR HANDLING-- */ + + *error_code = MPI_SUCCESS; +} + +int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, + void *attribute_val, void *extra_state) +{ + int error_code; + ADIOI_ZOIDFS_End(&error_code); + MPI_Keyval_free(&keyval); + return error_code; +} + +void ADIOI_ZOIDFS_Init(int rank, int *error_code ) +{ + int ret; + static char myname[] = "ADIOI_ZOIDFS_INIT"; + + /* do nothing if we've already fired up the zoidfs interface */ + if (ADIOI_ZOIDFS_Initialized != MPI_KEYVAL_INVALID) { + *error_code = MPI_SUCCESS; + return; + } + + ret = zoidfs_init(); + if (ret < 0 ) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_init", + 0); + return; + } + + MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_ZOIDFS_End_call, + &ADIOI_ZOIDFS_Initialized, (void *)0); + /* just like romio does, we make a dummy attribute so we + * get cleaned up */ + MPI_Attr_put(MPI_COMM_SELF, ADIOI_ZOIDFS_Initialized, (void *)0); +} + +void ADIOI_ZOIDFS_makeattribs(zoidfs_sattr_t * attribs) +{ + memset(attribs, 0, sizeof(zoidfs_sattr_t)); + + attribs->mask = ZOIDFS_ATTR_MODE; + attribs->mode = 0644; +} + +int ADIOI_ZOIDFS_error_convert(int error) +{ + switch (error) + { + case ZFSERR_PERM: /* ??? */ + case ZFSERR_ACCES: + return MPI_ERR_ACCESS; + case ZFSERR_NOENT: + case ZFSERR_NXIO: /* ??? */ + case ZFSERR_NODEV: /* ??? */ + return MPI_ERR_NO_SUCH_FILE; + case ZFSERR_IO: + return MPI_ERR_IO; + case ZFSERR_EXIST: + return MPI_ERR_FILE_EXISTS; + case ZFSERR_NOTDIR: /* ??? */ + case ZFSERR_ISDIR: /* ??? */ + case ZFSERR_NAMETOOLONG: + return MPI_ERR_BAD_FILE; + case ZFSERR_INVAL: + case ZFSERR_STALE: + return MPI_ERR_FILE; + case ZFSERR_FBIG: /* ??? */ + case ZFSERR_NOSPC: + return MPI_ERR_NO_SPACE; + case ZFSERR_ROFS: + return MPI_ERR_READ_ONLY; + case ZFSERR_NOTIMPL: + return MPI_ERR_UNSUPPORTED_OPERATION; + case ZFSERR_DQUOT: + return MPI_ERR_QUOTA; + /* case ZFSERR_NOTEMPTY: */ + /* case ZFSERR_WFLUSH: */ + /* case ZFSERR_OTHER: */ + case ZFSERR_NOMEM: + return MPI_ERR_INTERN; + default: + return MPI_UNDEFINED; + } +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h new file mode 100644 index 0000000000..b519f791dd --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#ifndef _AD_ZOIDFS_COMMON_H +#define _AD_ZOIDFS_COMMON_H +#include "ad_zoidfs.h" + +/* The ESTALE problem: + * The IO forwarding protocol can respond to any call with ESTALE, which means + * the handle upon which that call operates has expired from the metadata + * cache. We thus wrap any zoidfs routine (expr) in this macro. + * + * ROMIO stores the filename in the ADIOI_File structrue (fd), so we can always + * re-lookup in response to ESTALE */ + +#define NO_STALE(ret, fd, handle_p, expr) \ + do { \ + (ret) = (expr); \ + while ((ret) == ZFSERR_STALE) { \ + /* lookup again */ \ + (ret) = zoidfs_lookup(NULL, NULL, (fd)->filename, \ + (zoidfs_handle_t*)((fd)->fs_ptr), ZOIDFS_NO_OP_HINT); \ + if ((ret) == ZFS_OK) { \ + *((ADIOI_ZOIDFS_object*)handle_p) \ + = *((ADIOI_ZOIDFS_object*)((fd)->fs_ptr)); \ + /* re-execute the expr with new handle */ \ + (ret) = (expr); \ + } \ + } \ + } while (0) + +void ADIOI_ZOIDFS_Init(int rank, int *error_code ); +void ADIOI_ZOIDFS_makeattribs(zoidfs_sattr_t * attribs); +void ADIOI_ZOIDFS_End(int *error_code); +int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, + void *attribute_val, void *extra_state); +int ADIOI_ZOIDFS_error_convert(int error); + +#endif diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c new file mode 100644 index 0000000000..58d3bc0bb9 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2003 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "adio.h" + +#include "ad_zoidfs_common.h" + +void ADIOI_ZOIDFS_Delete(char *filename, int *error_code) +{ + int ret; + static char myname[] = "ADIOI_ZOIDFS_DELETE"; + + ADIOI_ZOIDFS_Init(0, error_code); + /* --BEGIN ERROR HANDLING-- */ + if (*error_code != MPI_SUCCESS) + { + /* ADIOI_ZOIDFS_INIT handles creating error codes itself */ + return; + } + /* --END ERROR HANDLING-- */ + + ret = zoidfs_remove(NULL, NULL, filename, NULL, ZOIDFS_NO_OP_HINT); + /* --BEGIN ERROR HANDLING-- */ + if (ret != ZFS_OK) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_remove", 0); + return; + } + /* --END ERROR HANDLING-- */ + + *error_code = MPI_SUCCESS; + return; +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c new file mode 100644 index 0000000000..04cd2b8f9c --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c @@ -0,0 +1,60 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "adio_extern.h" +#include "ad_zoidfs_common.h" + +void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, + int *error_code) +{ + int ret; + zoidfs_attr_t attr; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + static char myname[] = "ADIOI_ZOIDFS_FCNTL"; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr; + + switch(flag) { + case ADIO_FCNTL_GET_FSIZE: + attr.mask = ZOIDFS_ATTR_SIZE; + NO_STALE(ret, fd, zoidfs_obj_ptr, + zoidfs_getattr(zoidfs_obj_ptr, &attr, ZOIDFS_NO_OP_HINT)); + if ( !(attr.mask & ZOIDFS_ATTR_SIZE) || (ret != ZFS_OK ) ) { + /* --BEGIN ERROR HANDLING-- */ + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_getattr", 0); + /* --END ERROR HANDLING-- */ + } + else { + *error_code = MPI_SUCCESS; + } + fcntl_struct->fsize = attr.size; + return; + + case ADIO_FCNTL_SET_DISKSPACE: + ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code); + break; + + /* --BEGIN ERROR HANDLING-- */ + case ADIO_FCNTL_SET_ATOMICITY: + default: + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_ARG, + "**flag", "**flag %d", flag); + /* --END ERROR HANDLING-- */ + } +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c new file mode 100644 index 0000000000..892e4ca548 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c @@ -0,0 +1,16 @@ +#include "adio.h" +#include "ad_zoidfs.h" + +int ADIOI_ZOIDFS_Feature(ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_SCALABLE_OPEN: + return 1; + case ADIO_SHARED_FP: + case ADIO_LOCKS: + case ADIO_SEQUENTIAL: + case ADIO_DATA_SIEVING_WRITES: + default: + return 0; + } +} diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c new file mode 100644 index 0000000000..6191dada1d --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "ad_zoidfs_common.h" + +/* we want to be a bit clever here: at scale, if every client sends a + * flush request, it will stress the file system with redundant + * commit requests. Instead, one process should wait for + * everyone to catch up, do the sync, then broadcast the result. + */ + +void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code) +{ + int ret, rank, dummy=0, dummy_in=0; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + static char myname[] = "ADIOI_ZOIDFS_FLUSH"; + + *error_code = MPI_SUCCESS; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr; + + MPI_Comm_rank(fd->comm, &rank); + + /* collective call to ensure no outstanding write requests. reduce is + * slightly less expensvie than barrier */ + MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, + fd->hints->ranklist[0], fd->comm); + + if (rank == fd->hints->ranklist[0]) { + ret = zoidfs_commit(zoidfs_obj_ptr, ZOIDFS_NO_OP_HINT); + } + MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); + + /* --BEGIN ERROR HANDLING-- */ + if (ret != 0) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_commit", 0); + } + /* --END ERROR HANDLING-- */ +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c new file mode 100644 index 0000000000..f785d0da84 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c @@ -0,0 +1,95 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" +#include "adio_extern.h" +#include "ad_zoidfs.h" + +#include "ad_zoidfs_common.h" + +#define ZOIDFS_READ 0 +#define ZOIDFS_WRITE 1 + +static void ZOIDFS_IOContig(ADIO_File fd, void * buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int flag, int *error_code) +{ + int ret, datatype_size; + uint64_t file_len; + size_t mem_len; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + uint64_t file_offset = offset; + static char myname[] = "ADIOI_ZOIDFS_IOCONTIG"; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr; + + MPI_Type_size(datatype, &datatype_size); + file_len = mem_len = datatype_size * count; + + if (file_ptr_type == ADIO_INDIVIDUAL) { + file_offset = fd->fp_ind; + } + + if (flag == ZOIDFS_READ) { + NO_STALE(ret, fd, zoidfs_obj_ptr, + zoidfs_read(zoidfs_obj_ptr, + 1, &buf, &mem_len, + 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT)); + } else { + NO_STALE(ret, fd, zoidfs_obj_ptr, + zoidfs_write(zoidfs_obj_ptr, + 1, (const void **)&buf, &mem_len, + 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT)); + } + /* --BEGIN ERROR HANDLING-- */ + if (ret != ZFS_OK ) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in ZOIDFS I/O", 0); + goto fn_exit; + } + /* --END ERROR HANDLING-- */ + + if (file_ptr_type == ADIO_INDIVIDUAL) { + fd->fp_ind += file_len; + } + fd->fp_sys_posn = file_offset + file_len; + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, file_len); +#endif + + *error_code = MPI_SUCCESS; + +fn_exit: + return; +} + +void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type, + offset, status, ZOIDFS_READ, error_code); +} + +void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type, + offset, status, ZOIDFS_WRITE, error_code); +} + + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c new file mode 100644 index 0000000000..f0d5484223 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c @@ -0,0 +1,153 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2007 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "ad_zoidfs_common.h" + +/* open_status is helpful for bcasting values around */ +struct open_status_s { + int error; + zoidfs_handle_t handle; +}; +typedef struct open_status_s open_status; + +static void fake_an_open(char *fname, int access_mode, + int nr_datafiles, MPI_Offset strip_size, + ADIOI_ZOIDFS_object *zoidfs_ptr, + open_status *o_status) +{ + int ret, created; + zoidfs_sattr_t attribs; + zoidfs_handle_t handle; + + ADIOI_ZOIDFS_makeattribs(&attribs); + + /* zoidfs_create succeeds even if a file already exists, so we can do + * our job with fewer calls than in other cases. However, we need to + * be careful with ADIO_EXCL. + */ + if (access_mode & ADIO_CREATE) { + ret = zoidfs_create(NULL, NULL, + fname, &attribs, &handle, &created, ZOIDFS_NO_OP_HINT); + if ((ret == ZFS_OK) && !created && (access_mode & ADIO_EXCL)) { + /* lookup should not succeed if opened with EXCL */ + o_status->error = ZFSERR_EXIST; + return; + } + } + else { + ret = zoidfs_lookup(NULL, NULL, fname, &handle, ZOIDFS_NO_OP_HINT); + } + + o_status->error = ret; + o_status->handle = handle; + return; +} + + +/* ADIOI_ZOIDFS_Open: + * one process opens (or creates) the file, then broadcasts the result to the + * remaining processors. + * + * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before + * that, MPI_MODE_EXCL) was set. Because ZoidFS handles file lookup and + * creation more scalably than traditional file systems, ADIO_Open now skips any + * special handling when CREATE is set. */ +void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code) +{ + int rank; + static char myname[] = "ADIOI_ZOIDFS_OPEN"; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + + /* since one process is doing the open, that means one process is also + * doing the error checking. define a struct for both the object reference + * and the error code to broadcast to all the processors */ + + open_status o_status; + MPI_Datatype open_status_type; + MPI_Datatype types[2] = {MPI_INT, MPI_BYTE}; + int lens[2] = {1, sizeof(ADIOI_ZOIDFS_object)}; + MPI_Aint offsets[2]; + + memset(&o_status, 0, sizeof(o_status)); + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *) + ADIOI_Malloc(sizeof(ADIOI_ZOIDFS_object)); + /* --BEGIN ERROR HANDLING-- */ + if (zoidfs_obj_ptr == NULL) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_UNKNOWN, + "Error allocating memory", 0); + return; + } + /* --END ERROR HANDLING-- */ + + MPI_Comm_rank(fd->comm, &rank); + + ADIOI_ZOIDFS_Init(rank, error_code); + if (*error_code != MPI_SUCCESS) + { + /* ADIOI_ZOIDFS_INIT handles creating error codes on its own */ + ADIOI_Free(zoidfs_obj_ptr); + return; + } + + /* one process resolves name and will later bcast to others */ +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_open_a, 0, NULL ); +#endif + if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) { + fake_an_open(fd->filename, fd->access_mode, + fd->hints->striping_factor, + fd->hints->striping_unit, + zoidfs_obj_ptr, &o_status); + /* store credentials and object reference in fd */ + *zoidfs_obj_ptr = o_status.handle; + fd->fs_ptr = zoidfs_obj_ptr; + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_open_b, 0, NULL ); +#endif + + /* broadcast status and (possibly valid) object reference */ + MPI_Address(&o_status.error, &offsets[0]); + MPI_Address(&o_status.handle, &offsets[1]); + + MPI_Type_struct(2, lens, offsets, types, &open_status_type); + MPI_Type_commit(&open_status_type); + + /* Assertion: if we hit this Bcast, then all processes collectively + * called this open. + * + * That's because deferred open never happens with this fs. + */ + MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0], + fd->comm); + MPI_Type_free(&open_status_type); + + /* --BEGIN ERROR HANDLING-- */ + if (o_status.error != ZFS_OK) + { + ADIOI_Free(zoidfs_obj_ptr); + fd->fs_ptr = NULL; + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(o_status.error), + "Unknown error", 0); + /* TODO: FIX STRING */ + return; + } + /* --END ERROR HANDLING-- */ + + *zoidfs_obj_ptr = o_status.handle; + fd->fs_ptr = zoidfs_obj_ptr; + + *error_code = MPI_SUCCESS; + return; +} diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c new file mode 100644 index 0000000000..ac3bf5d6bd --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c @@ -0,0 +1,826 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" +#include "adio_extern.h" +#include "ad_zoidfs.h" + +#include "ad_zoidfs_common.h" + +/* Copied from ADIOI_PVFS2_OldReadStrided. It would be good to have fewer + * copies of this code... */ +void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code) +{ + /* offset is in units of etype relative to the filetype. */ + ADIOI_Flatlist_node *flat_buf, *flat_file; + int i, j, k, brd_size, frd_size=0, st_index=0; + int bufsize, sum, n_etypes_in_filetype, size_in_filetype; + int n_filetypes, etype_in_filetype; + ADIO_Offset abs_off_in_filetype=0; + int filetype_size, etype_size, buftype_size; + MPI_Aint filetype_extent, buftype_extent; + int buf_count, buftype_is_contig, filetype_is_contig; + ADIO_Offset off, disp, start_off, initial_off; + int flag, st_frd_size, st_n_filetypes; + + size_t mem_list_count, file_list_count; + void ** mem_offsets; + uint64_t *file_offsets; + size_t *mem_lengths; + uint64_t *file_lengths; + int total_blks_to_read; + + int max_mem_list, max_file_list; + + int b_blks_read; + int f_data_read; + int size_read=0, n_read_lists, extra_blks; + + int end_brd_size, end_frd_size; + int start_k, start_j, new_file_read, new_buffer_read; + int start_mem_offset; + ADIOI_ZOIDFS_object * zoidfs_obj_ptr; + int err_flag=0; + MPI_Offset total_bytes_read = 0; + static char myname[] = "ADIOI_ZOIDFS_ReadStrided"; + + /* note: I don't know what zoidfs will do if you pass it a super-long list, + * so let's keep with the PVFS limit for now */ +#define MAX_ARRAY_SIZE 64 + + *error_code = MPI_SUCCESS; /* changed below if error */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + /* the HDF5 tests showed a bug in this list processing code (see many many + * lines down below). We added a workaround, but common HDF5 file types + * are actually contiguous and do not need the expensive workarond */ + if (!filetype_is_contig) { + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + if (flat_file->count == 1 && !buftype_is_contig) + filetype_is_contig = 1; + } + + MPI_Type_size(fd->filetype, &filetype_size); + if ( ! filetype_size ) { + *error_code = MPI_SUCCESS; + return; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + etype_size = fd->etype_size; + + bufsize = buftype_size * count; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr; + + if (!buftype_is_contig && filetype_is_contig) { + +/* noncontiguous in memory, contiguous in file. */ + uint64_t file_offsets; + uint64_t file_lengths; + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + fd->disp + etype_size * offset; + + file_list_count = 1; + file_offsets = off; + file_lengths = 0; + total_blks_to_read = count*flat_buf->count; + b_blks_read = 0; + + /* allocate arrays according to max usage */ + if (total_blks_to_read > MAX_ARRAY_SIZE) + mem_list_count = MAX_ARRAY_SIZE; + else mem_list_count = total_blks_to_read; + mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*)); + mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t)); + + /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */ + + j = 0; + /* step through each block in memory, filling memory arrays */ + while (b_blks_read < total_blks_to_read) { + for (i=0; icount; i++) { + mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = + buf + j*buftype_extent + flat_buf->indices[i]; + mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = + flat_buf->blocklens[i]; + file_lengths += flat_buf->blocklens[i]; + b_blks_read++; + if (!(b_blks_read % MAX_ARRAY_SIZE) || + (b_blks_read == total_blks_to_read)) { + + /* in the case of the last read list call, + adjust mem_list_count */ + if (b_blks_read == total_blks_to_read) { + mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE; + /* in case last read list call fills max arrays */ + if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); +#endif + NO_STALE(err_flag, fd, zoidfs_obj_ptr, + zoidfs_read(zoidfs_obj_ptr, + mem_list_count, + mem_offsets, mem_lengths, + 1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT)); +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); +#endif + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != ZFS_OK) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(err_flag), + "Error in zoidfs_read", 0); + goto error_state; + } + total_bytes_read += file_lengths; + /* --END ERROR HANDLING-- */ + + /* in the case of error or the last read list call, + * leave here */ + if (err_flag || b_blks_read == total_blks_to_read) break; + + file_offsets += file_lengths; + file_lengths = 0; + } + } /* for (i=0; icount; i++) */ + j++; + } /* while (b_blks_read < total_blks_to_read) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind += total_bytes_read; + + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); + /* This isa temporary way of filling in status. The right way is to + keep tracke of how much data was actually read adn placed in buf + by ADIOI_BUFFERED_READ. */ +#endif + ADIOI_Delete_flattened(datatype); + + return; + } /* if (!buftype_is_contig && filetype_is_contig) */ + + /* know file is noncontiguous from above */ + /* noncontiguous in file */ + + /* filetype already flattened in ADIO_Open */ + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + + disp = fd->disp; + initial_off = offset; + + + /* for each case - ADIO_Individual pointer or explicit, find the file + offset in bytes (offset), n_filetypes (how many filetypes into + file to start), frd_size (remaining amount of data in present + file block), and st_index (start point in terms of blocks in + starting filetype) */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + offset = fd->fp_ind; /* in bytes */ + n_filetypes = -1; + flag = 0; + while (!flag) { + n_filetypes++; + for (i=0; icount; i++) { + if (disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] >= offset) { + st_index = i; + frd_size = disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] - offset; + flag = 1; + break; + } + } + } /* while (!flag) */ + } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ + else { + n_etypes_in_filetype = filetype_size/etype_size; + n_filetypes = (int) (offset / n_etypes_in_filetype); + etype_in_filetype = (int) (offset % n_etypes_in_filetype); + size_in_filetype = etype_in_filetype * etype_size; + + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum > size_in_filetype) { + st_index = i; + frd_size = sum - size_in_filetype; + abs_off_in_filetype = flat_file->indices[i] + + size_in_filetype - (sum - flat_file->blocklens[i]); + break; + } + } + + /* abs. offset in bytes in the file */ + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + abs_off_in_filetype; + } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ + + start_off = offset; + st_frd_size = frd_size; + st_n_filetypes = n_filetypes; + + if (buftype_is_contig && !filetype_is_contig) { + +/* contiguous in memory, noncontiguous in file. should be the most + common case. */ + + /* only one memory off-len pair, so no array here */ + size_t mem_lengths; + size_t mem_offsets; + + i = 0; + j = st_index; + n_filetypes = st_n_filetypes; + + mem_list_count = 1; + + /* determine how many blocks in file to read */ + f_data_read = ADIOI_MIN(st_frd_size, bufsize); + total_blks_to_read = 1; + if (j < (flat_file->count-1)) j++; + else { + j = 0; + n_filetypes++; + } + while (f_data_read < bufsize) { + f_data_read += flat_file->blocklens[j]; + total_blks_to_read++; + if (j<(flat_file->count-1)) j++; + else j = 0; + } + + j = st_index; + n_filetypes = st_n_filetypes; + n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; + extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; + + mem_offsets = (size_t)buf; + mem_lengths = 0; + + /* if at least one full readlist, allocate file arrays + at max array size and don't free until very end */ + if (n_read_lists) { + file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int64_t)); + file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(uint64_t)); + } + /* if there's no full readlist allocate file arrays according + to needed size (extra_blks) */ + else { + file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* + sizeof(int64_t)); + file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks* + sizeof(uint64_t)); + } + + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ + for (i=0; iindices[j]; + file_lengths[k] = flat_file->blocklens[j]; + mem_lengths += file_lengths[k]; + } + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kindices[j]; + if (k == (extra_blks - 1)) { + file_lengths[k] = bufsize - mem_lengths + - mem_offsets + (size_t)buf; + } + else file_lengths[k] = flat_file->blocklens[j]; + } /* if(i || k) */ + mem_lengths += file_lengths[k]; + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; ktype != datatype) flat_buf = flat_buf->next; + + size_read = 0; + n_filetypes = st_n_filetypes; + frd_size = st_frd_size; + brd_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + max_mem_list = 0; + max_file_list = 0; + + /* run through and file max_file_list and max_mem_list so that you + can allocate the file and memory arrays less than MAX_ARRAY_SIZE + if possible */ + + while (size_read < bufsize) { + k = start_k; + new_buffer_read = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data read and data to be + read in the next immediate read list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k] + + size_read) > bufsize) { + end_brd_size = new_buffer_read + + flat_buf->blocklens[k] - (bufsize - size_read); + new_buffer_read = bufsize - size_read; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + else new_buffer_read = brd_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + j = start_j; + new_file_read = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_read < new_buffer_read)) { + if(file_list_count) { + if((new_file_read + flat_file->blocklens[j]) > + new_buffer_read) { + end_frd_size = new_buffer_read - new_file_read; + new_file_read = new_buffer_read; + j--; + } + else { + new_file_read += flat_file->blocklens[j]; + end_frd_size = flat_file->blocklens[j]; + } + } + else { + if (frd_size > new_buffer_read) { + new_file_read = new_buffer_read; + frd_size = new_file_read; + } + else new_file_read = frd_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_read < new_buffer_read) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_read = 0; + mem_list_count = 0; + while (new_buffer_read < new_file_read) { + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k]) > + new_file_read) { + end_brd_size = new_file_read - new_buffer_read; + new_buffer_read = new_file_read; + k--; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_read = brd_size; + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_read < new_file_read) */ + } /* if ((new_file_read < new_buffer_read) && (file_list_count + == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + + /* fakes filling the readlist arrays of lengths found above */ + k = start_k; + j = start_j; + for (i=0; iblocklens[k] == end_brd_size) + brd_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + brd_size = flat_buf->blocklens[k] - end_brd_size; + k--; + buf_count--; + } + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iblocklens[j] == end_frd_size) + frd_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + frd_size = flat_file->blocklens[j] - end_frd_size; + j--; + } + } + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iblocklens[0] ) ) || + ((mem_list_count == 1) && + (new_buffer_read < flat_buf->blocklens[0]) ) || + ((file_list_count == MAX_ARRAY_SIZE) && + (new_file_read < flat_buf->blocklens[0]) ) || + ( (mem_list_count == MAX_ARRAY_SIZE) && + (new_buffer_read < flat_file->blocklens[0])) ) + { + + ADIOI_Delete_flattened(datatype); + ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype, + file_ptr_type, initial_off, status, error_code); + return; + } + + mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *)); + mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t)); + file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); + file_lengths = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); + + size_read = 0; + n_filetypes = st_n_filetypes; + frd_size = st_frd_size; + brd_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + + /* this section calculates mem_list_count and file_list_count + and also finds the possibly odd sized last array elements + in new_frd_size and new_brd_size */ + + while (size_read < bufsize) { + k = start_k; + new_buffer_read = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data read and data to be + read in the next immediate read list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k] + + size_read) > bufsize) { + end_brd_size = new_buffer_read + + flat_buf->blocklens[k] - (bufsize - size_read); + new_buffer_read = bufsize - size_read; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + else new_buffer_read = brd_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + j = start_j; + new_file_read = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_read < new_buffer_read)) { + if(file_list_count) { + if((new_file_read + flat_file->blocklens[j]) > + new_buffer_read) { + end_frd_size = new_buffer_read - new_file_read; + new_file_read = new_buffer_read; + j--; + } + else { + new_file_read += flat_file->blocklens[j]; + end_frd_size = flat_file->blocklens[j]; + } + } + else { + if (frd_size > new_buffer_read) { + new_file_read = new_buffer_read; + frd_size = new_file_read; + } + else new_file_read = frd_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_read < new_buffer_read) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_read = 0; + mem_list_count = 0; + while (new_buffer_read < new_file_read) { + if(mem_list_count) { + if((new_buffer_read + flat_buf->blocklens[k]) > + new_file_read) { + end_brd_size = new_file_read - new_buffer_read; + new_buffer_read = new_file_read; + k--; + } + else { + new_buffer_read += flat_buf->blocklens[k]; + end_brd_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_read = brd_size; + if (brd_size > (bufsize - size_read)) { + new_buffer_read = bufsize - size_read; + brd_size = new_buffer_read; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_read < new_file_read) */ + } /* if ((new_file_read < new_buffer_read) && (file_list_count + == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_read < bufsize-size_read)) */ + + /* fills the allocated readlist arrays */ + k = start_k; + j = start_j; + for (i=0; icount) + + flat_buf->indices[k]; + if(!i) { + mem_lengths[0] = brd_size; + mem_offsets[0] += flat_buf->blocklens[k] - brd_size; + } + else { + if (i == (mem_list_count - 1)) { + mem_lengths[i] = end_brd_size; + if (flat_buf->blocklens[k] == end_brd_size) + brd_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + brd_size = flat_buf->blocklens[k] - end_brd_size; + k--; + buf_count--; + } + } + else { + mem_lengths[i] = flat_buf->blocklens[k]; + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iindices[j] + + ((ADIO_Offset)n_filetypes) * filetype_extent; + if (!i) { + file_lengths[0] = frd_size; + file_offsets[0] += flat_file->blocklens[j] - frd_size; + } + else { + if (i == (file_list_count - 1)) { + file_lengths[i] = end_frd_size; + if (flat_file->blocklens[j] == end_frd_size) + frd_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + frd_size = flat_file->blocklens[j] - end_frd_size; + j--; + } + } + else file_lengths[i] = flat_file->blocklens[j]; + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; ifp_ind = file_offsets[file_list_count-1]+ + file_lengths[file_list_count-1]; + } + + ADIOI_Free(file_offsets); + ADIOI_Free(file_lengths); + + if (err_flag == 0) *error_code = MPI_SUCCESS; + +error_state: + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); + /* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf + by ADIOI_BUFFERED_READ. */ +#endif + + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); +} + diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c new file mode 100644 index 0000000000..de000f1e6e --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c @@ -0,0 +1,53 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ad_zoidfs.h" +#include "ad_zoidfs_common.h" + +/* as with flush, implement the resize operation in a scalable + * manner. one process does the work, then broadcasts the result to everyone + * else. fortunately, this operation is defined to be collective */ +void ADIOI_ZOIDFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) +{ + int ret, rank; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + static char myname[] = "ADIOI_ZOIDFS_RESIZE"; + + *error_code = MPI_SUCCESS; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr; + + MPI_Comm_rank(fd->comm, &rank); + + + /* MPI-IO semantics treat conflicting MPI_File_set_size requests the + * same as conflicting write requests. Thus, a resize from one + * process does not have to be visible to the other processes until a + * syncronization point is reached */ + + if (rank == fd->hints->ranklist[0]) { + NO_STALE(ret, fd, zoidfs_obj_ptr, + zoidfs_resize(zoidfs_obj_ptr, size, ZOIDFS_NO_OP_HINT)); + MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); + } else { + MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); + } + /* --BEGIN ERROR HANDLING-- */ + if (ret != ZFS_OK) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(ret), + "Error in zoidfs_resize", 0); + return; + } + /* --END ERROR HANDLING-- */ +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c new file mode 100644 index 0000000000..5ecf0c82ed --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c @@ -0,0 +1,857 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" +#include "adio_extern.h" +#include "ad_zoidfs.h" + +#include "ad_zoidfs_common.h" + +/* Copied from ADIOI_PVFS2_OldWriteStrided. It would be good to have fewer + * copies of this code... */ +void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + /* as with all the other WriteStrided functions, offset is in units of + * etype relative to the filetype */ + + /* Since zoidfs does not support file locking, can't do buffered writes + as on Unix */ + + ADIOI_Flatlist_node *flat_buf, *flat_file; + int i, j, k, bwr_size, fwr_size=0, st_index=0; + int bufsize, sum, n_etypes_in_filetype, size_in_filetype; + int n_filetypes, etype_in_filetype; + ADIO_Offset abs_off_in_filetype=0; + int filetype_size, etype_size, buftype_size; + MPI_Aint filetype_extent, buftype_extent; + int buf_count, buftype_is_contig, filetype_is_contig; + ADIO_Offset off, disp, start_off, initial_off; + int flag, st_fwr_size, st_n_filetypes; + int err_flag=0; + + size_t mem_list_count, file_list_count; + const void ** mem_offsets; + uint64_t *file_offsets; + size_t *mem_lengths; + uint64_t *file_lengths; + int total_blks_to_write; + + int max_mem_list, max_file_list; + + int b_blks_wrote; + int f_data_wrote; + int size_wrote=0, n_write_lists, extra_blks; + + int end_bwr_size, end_fwr_size; + int start_k, start_j, new_file_write, new_buffer_write; + int start_mem_offset; + ADIOI_ZOIDFS_object *zoidfs_obj_ptr; + MPI_Offset total_bytes_written=0; + static char myname[] = "ADIOI_ZOIDFS_WRITESTRIDED"; + + /* note: I don't know what zoidfs will do if you pass it a super-long list, + * so let's keep with the PVFS limit for now */ +#define MAX_ARRAY_SIZE 64 + + /* --BEGIN ERROR HANDLING-- */ + if (fd->atomicity) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_ARG, + "Atomic noncontiguous writes are not supported by ZOIDFS", 0); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); + + /* the HDF5 tests showed a bug in this list processing code (see many many + * lines down below). We added a workaround, but common HDF5 file types + * are actually contiguous and do not need the expensive workarond */ + if (!filetype_is_contig) { + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + if (flat_file->count == 1 && !buftype_is_contig) + filetype_is_contig = 1; + } + + MPI_Type_size(fd->filetype, &filetype_size); + if ( ! filetype_size ) { + *error_code = MPI_SUCCESS; + return; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(datatype, &buftype_size); + MPI_Type_extent(datatype, &buftype_extent); + etype_size = fd->etype_size; + + bufsize = buftype_size * count; + + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr; + + if (!buftype_is_contig && filetype_is_contig) { + +/* noncontiguous in memory, contiguous in file. */ + uint64_t file_offsets; + uint64_t file_lengths; + + ADIOI_Flatten_datatype(datatype); + flat_buf = ADIOI_Flatlist; + while (flat_buf->type != datatype) flat_buf = flat_buf->next; + + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { + off = fd->disp + etype_size * offset; + } + else off = fd->fp_ind; + + file_list_count = 1; + file_offsets = off; + file_lengths = 0; + total_blks_to_write = count*flat_buf->count; + b_blks_wrote = 0; + + /* allocate arrays according to max usage */ + if (total_blks_to_write > MAX_ARRAY_SIZE) + mem_list_count = MAX_ARRAY_SIZE; + else mem_list_count = total_blks_to_write; + mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*)); + mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t)); + + j = 0; + /* step through each block in memory, filling memory arrays */ + while (b_blks_wrote < total_blks_to_write) { + for (i=0; icount; i++) { + mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = + buf + + j*buftype_extent + + flat_buf->indices[i]; + mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = + flat_buf->blocklens[i]; + file_lengths += flat_buf->blocklens[i]; + b_blks_wrote++; + if (!(b_blks_wrote % MAX_ARRAY_SIZE) || + (b_blks_wrote == total_blks_to_write)) { + + /* in the case of the last write list call, + adjust mem_list_count */ + if (b_blks_wrote == total_blks_to_write) { + mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE; + /* in case last write list call fills max arrays */ + if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); +#endif + NO_STALE(err_flag, fd, zoidfs_obj_ptr, + zoidfs_write(zoidfs_obj_ptr, + mem_list_count, + mem_offsets, mem_lengths, + 1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT)); + + /* --BEGIN ERROR HANDLING-- */ + if (err_flag != ZFS_OK) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(err_flag), + "Error in zoidfs_write", 0); + break; + } +#ifdef ADIOI_MPE_LOGGING + MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); +#endif + total_bytes_written += file_lengths; + + /* in the case of error or the last write list call, + * leave here */ + /* --BEGIN ERROR HANDLING-- */ + if (err_flag) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, + ADIOI_ZOIDFS_error_convert(err_flag), + "Error in zoidfs_write", 0); + break; + } + /* --END ERROR HANDLING-- */ + if (b_blks_wrote == total_blks_to_write) break; + + file_offsets += file_lengths; + file_lengths = 0; + } + } /* for (i=0; icount; i++) */ + j++; + } /* while (b_blks_wrote < total_blks_to_write) */ + ADIOI_Free(mem_offsets); + ADIOI_Free(mem_lengths); + + if (file_ptr_type == ADIO_INDIVIDUAL) + fd->fp_ind += total_bytes_written; + + if (!err_flag) *error_code = MPI_SUCCESS; + + fd->fp_sys_posn = -1; /* clear this. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + + ADIOI_Delete_flattened(datatype); + return; + } /* if (!buftype_is_contig && filetype_is_contig) */ + + /* already know that file is noncontiguous from above */ + /* noncontiguous in file */ + +/* filetype already flattened in ADIO_Open */ + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + + disp = fd->disp; + initial_off = offset; + + /* for each case - ADIO_Individual pointer or explicit, find offset + (file offset in bytes), n_filetypes (how many filetypes into file + to start), fwr_size (remaining amount of data in present file + block), and st_index (start point in terms of blocks in starting + filetype) */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + offset = fd->fp_ind; /* in bytes */ + n_filetypes = -1; + flag = 0; + while (!flag) { + n_filetypes++; + for (i=0; icount; i++) { + if (disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] >= offset) { + st_index = i; + fwr_size = disp + flat_file->indices[i] + + ((ADIO_Offset) n_filetypes)*filetype_extent + + flat_file->blocklens[i] - offset; + flag = 1; + break; + } + } + } /* while (!flag) */ + } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ + else { + n_etypes_in_filetype = filetype_size/etype_size; + n_filetypes = (int) (offset / n_etypes_in_filetype); + etype_in_filetype = (int) (offset % n_etypes_in_filetype); + size_in_filetype = etype_in_filetype * etype_size; + + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum > size_in_filetype) { + st_index = i; + fwr_size = sum - size_in_filetype; + abs_off_in_filetype = flat_file->indices[i] + + size_in_filetype - (sum - flat_file->blocklens[i]); + break; + } + } + + /* abs. offset in bytes in the file */ + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + abs_off_in_filetype; + } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ + + start_off = offset; + st_fwr_size = fwr_size; + st_n_filetypes = n_filetypes; + + if (buftype_is_contig && !filetype_is_contig) { + +/* contiguous in memory, noncontiguous in file. should be the most + common case. */ + + /* only one memory off-len pair, so no array */ + size_t mem_lengths; + size_t mem_offsets; + + i = 0; + j = st_index; + off = offset; + n_filetypes = st_n_filetypes; + + mem_list_count = 1; + + /* determine how many blocks in file to write */ + f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); + total_blks_to_write = 1; + if (j < (flat_file->count -1)) j++; + else { + j = 0; + n_filetypes++; + } + while (f_data_wrote < bufsize) { + f_data_wrote += flat_file->blocklens[j]; + total_blks_to_write++; + if (j<(flat_file->count-1)) j++; + else j = 0; + } + + j = st_index; + n_filetypes = st_n_filetypes; + n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; + extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; + + mem_offsets = (size_t)buf; + mem_lengths = 0; + + /* if at least one full writelist, allocate file arrays + at max array size and don't free until very end */ + if (n_write_lists) { + file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(int64_t)); + file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* + sizeof(uint64_t)); + } + /* if there's no full writelist allocate file arrays according + to needed size (extra_blks) */ + else { + file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* + sizeof(int64_t)); + file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks* + sizeof(uint64_t)); + } + + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ + for (i=0; iindices[j]; + file_lengths[k] = flat_file->blocklens[j]; + mem_lengths += file_lengths[k]; + } + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; kindices[j]; + /* XXX: double-check these casts */ + if (k == (extra_blks - 1)) { + file_lengths[k] = bufsize + - mem_lengths - mem_offsets + (size_t)buf; + } + else file_lengths[k] = flat_file->blocklens[j]; + } /* if(i || k) */ + mem_lengths += file_lengths[k]; + if (j<(flat_file->count - 1)) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (k=0; ktype != datatype) flat_buf = flat_buf->next; + + size_wrote = 0; + n_filetypes = st_n_filetypes; + fwr_size = st_fwr_size; + bwr_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + max_mem_list = 0; + max_file_list = 0; + + /* run through and file max_file_list and max_mem_list so that you + can allocate the file and memory arrays less than MAX_ARRAY_SIZE + if possible */ + + while (size_wrote < bufsize) { + k = start_k; + new_buffer_write = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data written and data to be + written in the next immediate write list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k] + + size_wrote) > bufsize) { + end_bwr_size = new_buffer_write + + flat_buf->blocklens[k] - (bufsize - size_wrote); + new_buffer_write = bufsize - size_wrote; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + else new_buffer_write = bwr_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + j = start_j; + new_file_write = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { + if(file_list_count) { + if((new_file_write + flat_file->blocklens[j]) > + new_buffer_write) { + end_fwr_size = new_buffer_write - new_file_write; + new_file_write = new_buffer_write; + j--; + } + else { + new_file_write += flat_file->blocklens[j]; + end_fwr_size = flat_file->blocklens[j]; + } + } + else { + if (fwr_size > new_buffer_write) { + new_file_write = new_buffer_write; + fwr_size = new_file_write; + } + else new_file_write = fwr_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_write = 0; + mem_list_count = 0; + while (new_buffer_write < new_file_write) { + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k]) > + new_file_write) { + end_bwr_size = new_file_write - + new_buffer_write; + new_buffer_write = new_file_write; + k--; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_write = bwr_size; + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_write < new_file_write) */ + } /* if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + + /* fakes filling the writelist arrays of lengths found above */ + k = start_k; + j = start_j; + for (i=0; iblocklens[k] == end_bwr_size) + bwr_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + bwr_size = flat_buf->blocklens[k] - end_bwr_size; + k--; + buf_count--; + } + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iblocklens[j] == end_fwr_size) + fwr_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + fwr_size = flat_file->blocklens[j] - end_fwr_size; + j--; + } + } + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; iblocklens[0] ) ) || + ((mem_list_count == 1) && + (new_buffer_write < flat_buf->blocklens[0]) ) || + ((file_list_count == MAX_ARRAY_SIZE) && + (new_file_write < flat_buf->blocklens[0]) ) || + ( (mem_list_count == MAX_ARRAY_SIZE) && + (new_buffer_write < flat_file->blocklens[0])) ) + { + ADIOI_Delete_flattened(datatype); + ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, + file_ptr_type, initial_off, status, error_code); + return; + } + + + mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *)); + mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t)); + file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); + file_lengths = (uint64_t*)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); + + size_wrote = 0; + n_filetypes = st_n_filetypes; + fwr_size = st_fwr_size; + bwr_size = flat_buf->blocklens[0]; + buf_count = 0; + start_mem_offset = 0; + start_k = k = 0; + start_j = st_index; + + /* this section calculates mem_list_count and file_list_count + and also finds the possibly odd sized last array elements + in new_fwr_size and new_bwr_size */ + + while (size_wrote < bufsize) { + k = start_k; + new_buffer_write = 0; + mem_list_count = 0; + while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) { + /* find mem_list_count and file_list_count such that both are + less than MAX_ARRAY_SIZE, the sum of their lengths are + equal, and the sum of all the data written and data to be + written in the next immediate write list is less than + bufsize */ + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k] + + size_wrote) > bufsize) { + end_bwr_size = new_buffer_write + + flat_buf->blocklens[k] - (bufsize - size_wrote); + new_buffer_write = bufsize - size_wrote; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + else new_buffer_write = bwr_size; + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + j = start_j; + new_file_write = 0; + file_list_count = 0; + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { + if(file_list_count) { + if((new_file_write + flat_file->blocklens[j]) > + new_buffer_write) { + end_fwr_size = new_buffer_write - new_file_write; + new_file_write = new_buffer_write; + j--; + } + else { + new_file_write += flat_file->blocklens[j]; + end_fwr_size = flat_file->blocklens[j]; + } + } + else { + if (fwr_size > new_buffer_write) { + new_file_write = new_buffer_write; + fwr_size = new_file_write; + } + else new_file_write = fwr_size; + } + file_list_count++; + if (j < (flat_file->count - 1)) j++; + else j = 0; + + k = start_k; + if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) { + new_buffer_write = 0; + mem_list_count = 0; + while (new_buffer_write < new_file_write) { + if(mem_list_count) { + if((new_buffer_write + flat_buf->blocklens[k]) > + new_file_write) { + end_bwr_size = new_file_write - + new_buffer_write; + new_buffer_write = new_file_write; + k--; + } + else { + new_buffer_write += flat_buf->blocklens[k]; + end_bwr_size = flat_buf->blocklens[k]; + } + } + else { + new_buffer_write = bwr_size; + if (bwr_size > (bufsize - size_wrote)) { + new_buffer_write = bufsize - size_wrote; + bwr_size = new_buffer_write; + } + } + mem_list_count++; + k = (k + 1)%flat_buf->count; + } /* while (new_buffer_write < new_file_write) */ + } /* if ((new_file_write < new_buffer_write) && + (file_list_count == MAX_ARRAY_SIZE)) */ + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + (new_buffer_write < bufsize-size_wrote)) */ + + /* fills the allocated writelist arrays */ + k = start_k; + j = start_j; + for (i=0; icount) + + flat_buf->indices[k]; + + if(!i) { + mem_lengths[0] = bwr_size; + mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; + } + else { + if (i == (mem_list_count - 1)) { + mem_lengths[i] = end_bwr_size; + if (flat_buf->blocklens[k] == end_bwr_size) + bwr_size = flat_buf->blocklens[(k+1)% + flat_buf->count]; + else { + bwr_size = flat_buf->blocklens[k] - end_bwr_size; + k--; + buf_count--; + } + } + else { + mem_lengths[i] = flat_buf->blocklens[k]; + } + } + buf_count++; + k = (k + 1)%flat_buf->count; + } /* for (i=0; iindices[j] + + ((ADIO_Offset)n_filetypes) * filetype_extent; + if (!i) { + file_lengths[0] = fwr_size; + file_offsets[0] += flat_file->blocklens[j] - fwr_size; + } + else { + if (i == (file_list_count - 1)) { + file_lengths[i] = end_fwr_size; + if (flat_file->blocklens[j] == end_fwr_size) + fwr_size = flat_file->blocklens[(j+1)% + flat_file->count]; + else { + fwr_size = flat_file->blocklens[j] - end_fwr_size; + j--; + } + } + else file_lengths[i] = flat_file->blocklens[j]; + } + if (j < flat_file->count - 1) j++; + else { + j = 0; + n_filetypes++; + } + } /* for (i=0; ifp_ind = file_offsets[file_list_count-1]+ + file_lengths[file_list_count-1]; + } + ADIOI_Free(file_offsets); + ADIOI_Free(file_lengths); + + *error_code = MPI_SUCCESS; + +error_state: + fd->fp_sys_posn = -1; /* set it to null. */ + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ +#endif + + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); +} diff --git a/ompi/mca/io/romio/romio/adio/common/Makefile.am b/ompi/mca/io/romio/romio/adio/common/Makefile.am index 9dca94a663..eebfa5dd7c 100644 --- a/ompi/mca/io/romio/romio/adio/common/Makefile.am +++ b/ompi/mca/io/romio/romio/adio/common/Makefile.am @@ -26,24 +26,32 @@ include $(top_srcdir)/Makefile.options noinst_LTLIBRARIES = libadio_common.la libadio_common_la_SOURCES = \ ad_aggregate.c \ + ad_aggregate_new.c \ ad_close.c \ + ad_coll_build_req_new.c \ + ad_coll_exch_new.c \ ad_darray.c \ ad_delete.c \ ad_done.c \ ad_done_fake.c \ ad_end.c \ + ad_features.c \ ad_fcntl.c \ ad_flush.c \ ad_fstype.c \ ad_get_sh_fp.c \ ad_hints.c \ ad_init.c \ + ad_io_coll.c \ ad_iopen.c \ ad_iread.c \ ad_iread_fake.c \ ad_iwrite.c \ ad_iwrite_fake.c \ ad_open.c \ + ad_opencoll.c \ + ad_opencoll_failsafe.c \ + ad_opencoll_scalable.c \ ad_prealloc.c \ ad_read.c \ ad_read_coll.c \ @@ -69,6 +77,7 @@ libadio_common_la_SOURCES = \ flatten.c \ get_fp_posn.c \ greq_fns.c \ + heap-sort.c \ iscontig.c \ lock.c \ malloc.c \ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c b/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c index 7252f50e2c..ab5d3636e4 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c @@ -7,6 +7,10 @@ #include "adio.h" #include "adio_extern.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + #undef AGG_DEBUG /* This file contains four functions: @@ -79,21 +83,26 @@ int ADIOI_Calc_aggregator(ADIO_File fd, ADIOI_UNREFERENCED_ARG(fd_start); -#ifdef AGG_DEBUG -#if 0 - FPRINTF(stdout, "off = %lld, min_off = %lld, len = %lld, fd_size = %lld\n", - off, min_off, *len, fd_size); -#endif -#endif - /* get an index into our array of aggregators */ rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1); + if (fd->hints->striping_unit > 0) { + /* wkliao: implementation for file domain alignment + fd_start[] and fd_end[] have been aligned with file lock + boundaries when returned from ADIOI_Calc_file_domains() so cannot + just use simple arithmatic as above */ + rank_index = 0; + while (off > fd_end[rank_index]) rank_index++; + } + /* we index into fd_end with rank_index, and fd_end was allocated to be no * bigger than fd->hins->cb_nodes. If we ever violate that, we're * overrunning arrays. Obviously, we should never ever hit this abort */ - if (rank_index >= fd->hints->cb_nodes) - MPI_Abort(MPI_COMM_WORLD, 1); + if (rank_index >= fd->hints->cb_nodes || rank_index < 0) { + FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n", + rank_index,fd->hints->cb_nodes,fd_size,off); + MPI_Abort(MPI_COMM_WORLD, 1); + } /* remember here that even in Rajeev's original code it was the case that * different aggregators could end up with different amounts of data to @@ -119,19 +128,21 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset *end_offsets, int nprocs, int nprocs_for_coll, ADIO_Offset *min_st_offset_ptr, ADIO_Offset **fd_start_ptr, ADIO_Offset - **fd_end_ptr, ADIO_Offset *fd_size_ptr) + **fd_end_ptr, int min_fd_size, + ADIO_Offset *fd_size_ptr, + int striping_unit) { /* Divide the I/O workload among "nprocs_for_coll" processes. This is done by (logically) dividing the file into file domains (FDs); each process may directly access only its own file domain. */ - /* XXX: one idea: tweak the file domains so that no fd is smaller than - * a threshold (one presumably well-suited to a file system). We don't - * do that, but this routine would be the place for it */ - ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, fd_size; int i; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5004, 0, NULL); +#endif + #ifdef AGG_DEBUG FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n", nprocs_for_coll); @@ -156,6 +167,14 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset 1)/nprocs_for_coll; /* ceiling division as in HPF block distribution */ + /* Tweak the file domains so that no fd is smaller than a threshold. We + * have to strike a balance between efficency and parallelism: somewhere + * between 10k processes sending 32-byte requests and one process sending a + * 320k request is a (system-dependent) sweet spot */ + + if (fd_size < min_fd_size) + fd_size = min_fd_size; + *fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); *fd_end_ptr = (ADIO_Offset *) @@ -164,12 +183,46 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset fd_start = *fd_start_ptr; fd_end = *fd_end_ptr; - fd_start[0] = min_st_offset; - fd_end[0] = min_st_offset + fd_size - 1; + /* Wei-keng Liao: implementation for fild domain alignment to nearest file + * lock boundary (as specified by striping_unit hint). Could also + * experiment with other alignment strategies here */ + if (striping_unit > 0) { + ADIO_Offset end_off; + int rem_front, rem_back; - for (i=1; i +/* #define DEBUG */ + +void ADIOI_Calc_file_realms_user_size (ADIO_File fd, int fr_size, + int nprocs_for_coll, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types); +void ADIOI_Calc_file_realms_aar (ADIO_File fd, int nprocs_for_coll, + int pfr_enabled, + ADIO_Offset min_st_offset, + ADIO_Offset max_end_offset, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types); +void ADIOI_Calc_file_realms_fsize (ADIO_File fd, + int nprocs_for_coll, + ADIO_Offset max_end_offset, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types); +void ADIOI_Create_fr_simpletype (int size, int nprocs_for_coll, + MPI_Datatype *simpletype); +static void align_fr (int fr_size, ADIO_Offset fr_off, int alignment, + int *aligned_fr_size, ADIO_Offset *aligned_fr_off); +void ADIOI_Verify_fr (int nprocs_for_coll, ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types); + +void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset, + ADIO_Offset max_end_offset) +{ + int nprocs_for_coll; + int file_realm_calc_type; + + MPI_Datatype *file_realm_types = NULL; + ADIO_Offset *file_realm_st_offs = NULL; + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5004, 0, NULL); +#endif +#ifdef DEBUG + printf ("ADIOI_Calc_file_realms\n"); +#endif + + nprocs_for_coll = fd->hints->cb_nodes; + file_realm_calc_type = fd->hints->cb_fr_type; + + /* If PFRs are disabled we know these pointers are not allocated */ + if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) { + fd->file_realm_st_offs = NULL; + fd->file_realm_types = NULL; + } + + if (nprocs_for_coll == 1) { + /* if there's only one aggregator, we can reset the file + * realms every single time */ + if (fd->file_realm_st_offs == NULL) + { + file_realm_st_offs = (ADIO_Offset *) + ADIOI_Malloc (sizeof(ADIO_Offset)); + file_realm_types = (MPI_Datatype *) + ADIOI_Malloc (sizeof(MPI_Datatype)); + } + else + { + file_realm_st_offs = fd->file_realm_st_offs; + file_realm_types = fd->file_realm_types; + } + *file_realm_st_offs = min_st_offset; + MPI_Type_contiguous ((max_end_offset - min_st_offset + 1), MPI_BYTE, + file_realm_types); + MPI_Type_commit (file_realm_types); + ADIOI_Add_contig_flattened (*file_realm_types); + } + else if (fd->file_realm_st_offs == NULL) { + file_realm_st_offs = (ADIO_Offset *) + ADIOI_Malloc (nprocs_for_coll * sizeof(ADIO_Offset)); + file_realm_types = (MPI_Datatype *) + ADIOI_Malloc (nprocs_for_coll * sizeof(MPI_Datatype)); + + if (file_realm_calc_type == ADIOI_FR_AAR) { + ADIOI_Calc_file_realms_aar (fd, nprocs_for_coll, + fd->hints->cb_pfr, + min_st_offset, max_end_offset, + file_realm_st_offs, file_realm_types); + /* flatten file realm datatype for future use - only one + * because all are the same*/ + ADIOI_Flatten_datatype (file_realm_types[0]); + } + else if (file_realm_calc_type == ADIOI_FR_FSZ) { + ADIOI_Calc_file_realms_fsize (fd, nprocs_for_coll, max_end_offset, + file_realm_st_offs, + file_realm_types); + /* flatten file realm datatype for future use - only one + * because all are the same*/ + ADIOI_Flatten_datatype (file_realm_types[0]); + } + else if (file_realm_calc_type == ADIOI_FR_USR_REALMS) { + /* copy user provided realm datatypes and realm offsets in + * hints to file descriptor. may also want to verify that + * the provided file realms are covering (for pfr at + * least) and non-overlapping */ + } + else if (file_realm_calc_type > 0) { + ADIOI_Calc_file_realms_user_size (fd, file_realm_calc_type, + nprocs_for_coll, + file_realm_st_offs, + file_realm_types); + /* flatten file realm datatype for future use - only one + * because all are the same */ + ADIOI_Flatten_datatype (file_realm_types[0]); + } + } + fd->file_realm_st_offs = file_realm_st_offs; + fd->file_realm_types = file_realm_types; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5005, 0, NULL); +#endif +} + +void ADIOI_Calc_file_realms_user_size (ADIO_File fd, int fr_size, + int nprocs_for_coll, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types) +{ + int i; + int aligned_fr_size; + ADIO_Offset aligned_fr_off; + MPI_Datatype simpletype; + + align_fr(fr_size, 0, fd->hints->cb_fr_alignment, &aligned_fr_size, + &aligned_fr_off); + fr_size = aligned_fr_size; + ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype); + + if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) + file_realm_st_offs[0] = 0; + else + file_realm_st_offs[0] = aligned_fr_off; + file_realm_types[0] = simpletype; +#ifdef DEBUG + printf ("file_realm[0] = (%lld, %d)\n", file_realm_st_offs[0], + fr_size); +#endif + + for (i=1; i < nprocs_for_coll; i++) + { + file_realm_st_offs[i] = file_realm_st_offs[i-1] + fr_size; + file_realm_types[i] = simpletype; +#ifdef DEBUG + printf ("file_realm[%d] = (%lld, %d)\n", i, file_realm_st_offs[i], + aligned_fr_size); +#endif + } +} + +/* takes an extra romio_cb_pfr param to decide whether file realms + * should start at byte 0 of the file*/ +void ADIOI_Calc_file_realms_aar (ADIO_File fd, int nprocs_for_coll, int cb_pfr, + ADIO_Offset min_st_offset, + ADIO_Offset max_end_offset, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types) +{ + int fr_size, aligned_fr_size, i; + MPI_Datatype simpletype; + ADIO_Offset aligned_start_off; + char value[9]; + + fr_size = (max_end_offset - min_st_offset + nprocs_for_coll) / + nprocs_for_coll; + align_fr(fr_size, min_st_offset, fd->hints->cb_fr_alignment, + &aligned_fr_size, &aligned_start_off); + fr_size = aligned_fr_size; + ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype); + if (cb_pfr == ADIOI_HINT_ENABLE) + file_realm_st_offs[0] = 0; + else + file_realm_st_offs[0] = aligned_start_off; + file_realm_types[0] = simpletype; + +#ifdef DEBUG + printf ("file_realm[0] = (%lld, %d)\n", file_realm_st_offs[0], + fr_size); +#endif + for (i=1; i < nprocs_for_coll; i++) + { + file_realm_st_offs[i] = file_realm_st_offs[i-1] + fr_size; + file_realm_types[i] = simpletype; +#ifdef DEBUG + printf ("file_realm[%d] = (%lld, %d)\n", i, file_realm_st_offs[i], + fr_size); +#endif + } + if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) { + sprintf (value, "%d", fr_size); + ADIOI_Info_set (fd->info, "romio_cb_fr_type", value); + } +} + +void ADIOI_Calc_file_realms_fsize (ADIO_File fd, int nprocs_for_coll, + ADIO_Offset max_end_offset, + ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types) +{ + int fr_size, aligned_fr_size, error_code, i; + int fsize; + ADIO_Offset aligned_fr_off; + ADIO_Fcntl_t fcntl_struct; + MPI_Datatype simpletype; + + ADIO_Fcntl (fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code); + + /* use impending file size since a write call may lengthen the file */ + fsize = ADIOI_MAX (fcntl_struct.fsize, max_end_offset+1); + fr_size = (fsize + nprocs_for_coll - 1) / nprocs_for_coll; + align_fr(fr_size, 0, fd->hints->cb_fr_alignment, + &aligned_fr_size, &aligned_fr_off); + ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype); + + for (i=0; i < nprocs_for_coll; i++) + { + file_realm_st_offs[i] = fr_size * i; + file_realm_types[i] = simpletype; + } +} + +/* creates a datatype with an empty trailing edge */ +void ADIOI_Create_fr_simpletype (int size, int nprocs_for_coll, + MPI_Datatype *simpletype) +{ + int count=2, blocklens[2]; + MPI_Aint indices[2]; + MPI_Datatype old_types[2]; + + blocklens[0] = size; + blocklens[1] = 1; + indices[0] = 0; + indices[1] = size*nprocs_for_coll; + old_types[0] = MPI_BYTE; + old_types[1] = MPI_UB; + + MPI_Type_struct (count, blocklens, indices, old_types, simpletype); + + MPI_Type_commit (simpletype); +} + +/* Verify that file realms are covering (PFRs) and non-overlapping */ +void ADIOI_Verify_fr (int nprocs_for_coll, ADIO_Offset *file_realm_st_offs, + MPI_Datatype *file_realm_types) +{ +} + +int ADIOI_Agg_idx (int rank, ADIO_File fd) { + int i, cb_nodes, *ranklist; + cb_nodes = fd->hints->cb_nodes; + ranklist = fd->hints->ranklist; + + for (i=0; ihints && fd->hints->ranklist) ADIOI_Free(fd->hints->ranklist); if (fd->hints && fd->hints->cb_config_list) ADIOI_Free(fd->hints->cb_config_list); + + /* Persistent File Realms */ + if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) { + /* AAR, FSIZE, and User provided uniform File realms */ + if (1) { + ADIOI_Delete_flattened (fd->file_realm_types[0]); + MPI_Type_free (&fd->file_realm_types[0]); + } + else { + for (i=0; ihints->cb_nodes; i++) { + ADIOI_Datatype_iscontig(fd->file_realm_types[i], &is_contig); + if (!is_contig) + ADIOI_Delete_flattened(fd->file_realm_types[i]); + MPI_Type_free (&fd->file_realm_types[i]); + } + } + ADIOI_Free(fd->file_realm_st_offs); + ADIOI_Free(fd->file_realm_types); + } if (fd->hints) ADIOI_Free(fd->hints); + + + MPI_Comm_free(&(fd->comm)); /* deferred open: if we created an aggregator communicator, free it */ if (fd->agg_comm != MPI_COMM_NULL) { diff --git a/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c b/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c new file mode 100644 index 0000000000..3f3201bf1a --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c @@ -0,0 +1,2086 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include "adio.h" +#include "adio_extern.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif +#include "heap-sort.h" + +/* +#define DEBUG1 +#define DEBUG2 +#define DEBUG3 +*/ +/* #define DEBUG_HEAP */ + +#define DTYPE_SKIP + +#ifdef DEBUG3 +static char *off_type_name[MAX_OFF_TYPE] = {"TEMP_OFFSETS", + "REAL_OFFSETS"}; +#endif + +/* Simple function to return the size of the view_state. */ +static inline ADIO_Offset view_state_get_cur_sz(view_state *tmp_view_state_p, + int op_type) +{ + flatten_state *tmp_state_p = NULL; + switch(op_type) + { + case TEMP_OFF: + tmp_state_p = &(tmp_view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_state_p = &(tmp_view_state_p->cur_state); + break; + default: + fprintf(stderr, "op_type invalid\n"); + } + return tmp_state_p->cur_sz; +} + +/* Simple function to return the len of the next piece of the view_state. */ +static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p, + int op_type) +{ + flatten_state *tmp_state_p = NULL; + switch(op_type) + { + case TEMP_OFF: + tmp_state_p = &(tmp_view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_state_p = &(tmp_view_state_p->cur_state); + break; + default: + fprintf(stderr, "op_type invalid\n"); + } + return (ADIO_Offset) + tmp_view_state_p->flat_type_p->blocklens[tmp_state_p->idx] - + tmp_state_p->cur_reg_off; +} + +/* Add up to a region of a file view and no larger than a max size. + * The view_state is always consistent with the abs_off and where the + * index and cur_reg_off point to. The regions should be coalesced if + * possible later on. */ +static inline int view_state_add_region( + ADIO_Offset max_sz, + view_state *tmp_view_state_p, + ADIO_Offset *st_reg_p, + ADIO_Offset *tmp_reg_sz_p, + int op_type) +{ + ADIOI_Flatlist_node *tmp_flat_type_p = NULL; + flatten_state *tmp_state_p = NULL; + int64_t data_sz = 0; + +#ifdef AGGREGATION_PROFILE + /* MPE_Log_event (5020, 0, NULL); */ +#endif + + switch(op_type) + { + case TEMP_OFF: + tmp_state_p = &(tmp_view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_state_p = &(tmp_view_state_p->cur_state); + break; + default: + fprintf(stderr, "op_type invalid\n"); + } + + tmp_flat_type_p = tmp_view_state_p->flat_type_p; + + *st_reg_p = tmp_state_p->abs_off; + + /* Should be looking at some data (or it's a zero len blocklens + * (i.e. placeholder). */ + assert(tmp_state_p->cur_reg_off != + tmp_flat_type_p->blocklens[tmp_state_p->idx]); + /* Shouldn't have been called if the view_state is done. */ + assert(tmp_state_p->cur_sz != tmp_view_state_p->sz); + + /* Make sure we are not in a non-zero region in the flat_type */ + assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0); + +#ifdef DEBUG3 + fprintf(stderr, "view_state:(blocklens[%Ld]=%d,cur_reg_off=%Ld," + "max_sz=%Ld)\n", tmp_state_p->idx, + tmp_flat_type_p->blocklens[tmp_state_p->idx], + tmp_state_p->cur_reg_off, max_sz); +#endif + + /* Can it add the whole piece? */ + if (tmp_flat_type_p->blocklens[tmp_state_p->idx] - + tmp_state_p->cur_reg_off <= max_sz) + { + data_sz = tmp_flat_type_p->blocklens[tmp_state_p->idx] - + tmp_state_p->cur_reg_off; + + tmp_state_p->cur_sz += data_sz; + + /* Advance the abs_off to the beginning of the next piece */ + if (tmp_flat_type_p->count == 1) + { + assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0); + tmp_state_p->abs_off += data_sz; +#ifdef DEBUG3 + fprintf(stderr, "view_state_add_region: %s contig type " + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - data_sz, + tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); +#endif + } + else + { + /* Is this the last region in the datatype? */ + if (tmp_state_p->idx == (tmp_flat_type_p->count - 1)) + { + tmp_state_p->abs_off += data_sz - + tmp_flat_type_p->indices[tmp_flat_type_p->count-1] - + tmp_flat_type_p->blocklens[tmp_flat_type_p->count-1] + + tmp_view_state_p->ext; +#ifdef DEBUG3 + fprintf(stderr, "view_state_add_region: %s last region for type " + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - data_sz, + tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); +#endif + } + else + { + tmp_state_p->abs_off += + tmp_flat_type_p->indices[tmp_state_p->idx + 1] - + (tmp_flat_type_p->indices[tmp_state_p->idx] + + tmp_state_p->cur_reg_off); +#ifdef DEBUG3 + fprintf(stderr, "view_state_add_region: %s inner region type " + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - + (tmp_flat_type_p->indices[tmp_state_p->idx + 1] - + (tmp_flat_type_p->indices[tmp_state_p->idx] + + tmp_state_p->cur_reg_off)), tmp_state_p->abs_off, + tmp_state_p->cur_sz, data_sz); +#endif + } + /* Increment idx to next non-zero region in the flat_type */ + do { + tmp_state_p->idx = + (tmp_state_p->idx + 1) % tmp_flat_type_p->count; + } while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0); + } + tmp_state_p->cur_reg_off = 0; + } + else /* Add part of the piece */ + { + data_sz = max_sz; + tmp_state_p->cur_reg_off += data_sz; + tmp_state_p->abs_off += data_sz; + tmp_state_p->cur_sz += data_sz; +#ifdef DEBUG3 + fprintf(stderr, "view_state_add_region: %s partial region type " + "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n", + off_type_name[op_type], tmp_state_p->cur_reg_off, + tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); +#endif + } + + *tmp_reg_sz_p = data_sz; +#ifdef AGGREGATION_PROFILE + /* MPE_Log_event (5021, 0, NULL); */ +#endif + return 0; +} + +/* Set up the abs_off, idx, and cur_reg_off of a view_state for the + * tmp_state or the cur_state. */ +int ADIOI_init_view_state(int file_ptr_type, + int nprocs, + view_state *view_state_arr, + int op_type) +{ + ADIOI_Flatlist_node *tmp_flat_type_p = NULL; + ADIO_Offset tmp_off_used = 0, st_reg = 0, tmp_reg_sz = 0; + int i; + flatten_state *tmp_state_p = NULL; + view_state *tmp_view_p = NULL; + + for (i = 0; i < nprocs; i++) + { + switch(op_type) + { + case TEMP_OFF: + tmp_state_p = &(view_state_arr[i].tmp_state); + break; + case REAL_OFF: + tmp_state_p = &(view_state_arr[i].cur_state); + break; + default: + fprintf(stderr, "op_type invalid\n"); + } + + tmp_view_p = &(view_state_arr[i]); + tmp_flat_type_p = tmp_view_p->flat_type_p; + + if (file_ptr_type == ADIO_INDIVIDUAL) + tmp_state_p->abs_off = tmp_view_p->fp_ind; + else + tmp_state_p->abs_off = tmp_view_p->disp; + + tmp_off_used = 0; + + /* initialize tmp_state idx */ + while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0) + tmp_state_p->idx = (tmp_state_p->idx + 1) % tmp_flat_type_p->count; + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) + tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx]; + + /* Initialize the abs_off by moving into the datatype + * byte_off bytes. Since we only do this in the beginning, we + * make the assumption that pieces are added whole until the last + * piece which MAY be partial. */ + while (tmp_off_used != tmp_view_p->byte_off) + { + view_state_add_region( + tmp_view_p->byte_off - tmp_off_used, + &(view_state_arr[i]), &st_reg, &tmp_reg_sz, + op_type); + } + + /* Re-initialize the cur_size so that the abs_off was set to + * the proper position while the actual size = 0.*/ + tmp_state_p->cur_sz = 0; +#ifdef DEBUG1 + fprintf(stderr, "init_view_state: %s (idx=%d,byte_off=%Ld," + "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type], + i, tmp_view_p->byte_off, tmp_state_p->abs_off, + tmp_state_p->cur_reg_off, tmp_view_p->sz); +#endif + + } + return 0; +} + +/* Return the next file realm offset and length for this datatype state + * within a particular file realm. */ +static inline int get_next_fr_off(ADIO_File fd, + ADIO_Offset off, + ADIO_Offset fr_st_off, + MPI_Datatype *fr_type_p, + ADIO_Offset *fr_next_off_p, + ADIO_Offset *fr_max_len_p) +{ + MPI_Aint fr_extent = -1; + ADIO_Offset tmp_off, off_rem; + ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist; + int i = -1, fr_dtype_ct = 0; + + /* Should have already been flattened in calc_file_realms() */ + while (fr_node_p->type != (*fr_type_p)) + fr_node_p = fr_node_p->next; + assert(fr_node_p != NULL); + + /* Did we get to the first region of the file realm? */ + if (off - fr_st_off < 0) + { + *fr_next_off_p = fr_st_off + fr_node_p->indices[0]; + *fr_max_len_p = fr_node_p->blocklens[0]; + return 0; + } + + /* Calculate how many times to loop through the fr_type + * and where the next fr_off is. */ + MPI_Type_extent(*fr_type_p, &fr_extent); + tmp_off = off - fr_st_off; + fr_dtype_ct = tmp_off / fr_extent; + off_rem = tmp_off % fr_extent; + for (i = 0; i < fr_node_p->count; i++) + { + if (off_rem < fr_node_p->indices[i]) + { + *fr_next_off_p = fr_st_off + + (fr_dtype_ct * fr_extent) + fr_node_p->indices[i]; + *fr_max_len_p = fr_node_p->blocklens[i]; + return 0; + } + else if (off_rem < fr_node_p->indices[i] + fr_node_p->blocklens[i]) + { + *fr_next_off_p = off; + *fr_max_len_p = fr_node_p->blocklens[i] - + (off_rem - fr_node_p->indices[i]); + return off; + } + } + + /* Shouldn't get here. */ + fprintf(stderr, "get_next_fr_off: Couldn't find the correct " + "location of the next offset for this file realm.\n"); + return -1; +} + +/* Look in all the view states for the first offset within a given + * file realm. Report the end of a contiguous region within the file + * realm (possibly more than the actual view state may be able to + * process contiguously). */ +static inline int find_next_off(ADIO_File fd, + view_state *view_state_p, + ADIO_Offset fr_st_off, + MPI_Datatype *fr_type_p, + int op_type, + ADIO_Offset *cur_off_p, + ADIO_Offset *cur_reg_max_len_p) +{ + ADIOI_Flatlist_node *tmp_flat_type_p = NULL; + ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1, + tmp_fr_max_len = -1; + int ret = -1; + flatten_state *tmp_state_p = NULL; + ADIO_Offset tmp_st_off = 0, tmp_reg_sz = 0; +#ifdef DTYPE_SKIP + int skip_type_ct; +#endif + +#ifdef AGGREGATION_PROFILE + /* MPE_Log_event (5022, 0, NULL); */ +#endif + + switch(op_type) + { + case TEMP_OFF: + tmp_state_p = &(view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_state_p = &(view_state_p->cur_state); + break; + default: + fprintf(stderr, "op_type invalid\n"); + } + + tmp_flat_type_p = view_state_p->flat_type_p; + + /* Can we use this proc? */ + if (tmp_state_p->cur_sz < view_state_p->sz) { + tmp_st_off = 0; + tmp_reg_sz = 0; + /* If the current region is not within the file realm, advance + * the state until it is and calculate the end of the next file + * realm in fr_max_len. */ + ret = get_next_fr_off(fd, + tmp_state_p->abs_off, + fr_st_off, + fr_type_p, + &fr_next_off, + &fr_max_len); + + while ((tmp_state_p->abs_off < fr_next_off) && + (tmp_state_p->cur_sz != view_state_p->sz)) + { + + /* While this might appear to be erroneous at first, + * view_state_add_region can only add a single piece at a + * time. Therefore, it will never overshoot the beginning + * of the next file realm. When it finally does enter the + * next file realm it will not be able to go beyond its + * first piece. */ + +#ifdef DTYPE_SKIP + if (tmp_flat_type_p->count > 1) { + /* let's see if we can skip whole datatypes */ + skip_type_ct = (fr_next_off - tmp_state_p->abs_off) / + view_state_p->ext; + if (skip_type_ct > 0) { + /* before we go on, let's check if we've actually + * finished up already */ + tmp_state_p->cur_sz += skip_type_ct * + view_state_p->type_sz; + if (tmp_state_p->cur_sz >= view_state_p->sz) { + tmp_state_p->cur_sz = view_state_p->sz; + break; + } + tmp_state_p->abs_off += skip_type_ct * view_state_p->ext; + } + } +#endif + view_state_add_region( + fr_next_off - tmp_state_p->abs_off, + view_state_p, + &tmp_st_off, + &tmp_reg_sz, + op_type); + + ret = get_next_fr_off(fd, + tmp_state_p->abs_off, + fr_st_off, + fr_type_p, + &fr_next_off, + &fr_max_len); + } + + if (tmp_state_p->cur_sz != view_state_p->sz) { + tmp_off = tmp_state_p->abs_off; + /* Calculate how much of the remaining file realm there is from the + * current offset */ + tmp_fr_max_len = fr_next_off + fr_max_len - tmp_off; + } + } + + *cur_off_p = tmp_off; + *cur_reg_max_len_p = tmp_fr_max_len; +#ifdef AGGREGATION_PROFILE + /* MPE_Log_event (5023, 0, NULL); */ +#endif + return 0; +} + +/* Upon completion of a full collective buffer, end of a file realm + * region (data sieving), or the end of all I/O for an aggregator, we + * should return a list of MPI_Datatypes that correspond to client + * communication into a collective buffer, a list of corresponding + * sizes, and an aggregate MPI_Datatype which will be used as a + * filetype in MPI_File_write/read on the aggregator. */ +int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, + view_state *client_file_view_state_arr, + MPI_Datatype *client_comm_dtype_arr, + ADIO_Offset *client_comm_sz_arr, + ADIO_Offset *agg_dtype_offset_p, + MPI_Datatype *agg_dtype_p) +{ + MPI_Aint **client_disp_arr = NULL, *agg_disp_arr = NULL; + int **client_blk_arr = NULL, *agg_blk_arr = NULL; + ADIO_Offset tmp_coll_buf_sz = 0, st_reg = 0, act_reg_sz = 0; + ADIO_Offset cur_off = -1, cur_reg_max_len = -1; + ADIO_Offset ds_fr_end = -1; + ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs; + MPI_Datatype *fr_type_arr = fd->file_realm_types; + int *client_ol_ct_arr = NULL; + int *client_ol_cur_ct_arr = NULL; + int agg_ol_ct = 0, agg_ol_cur_ct = 0; + int cur_off_proc = -1; + int next_off_idx = -1; + int i = 0, j = 0, all_done = -1; + int agg_idx = fd->my_cb_nodes_index; + heap_t offset_heap; + ADIO_Offset next_off = -1, next_reg_max_len = -1; + + /* Used for coalescing ol pairs next to each other. */ + ADIO_Offset *client_comm_next_off_arr = NULL; + ADIO_Offset agg_next_off = -1; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5016, 0, NULL); +#endif + + memset(client_comm_sz_arr, 0, nprocs*sizeof(ADIO_Offset)); + + if ((client_comm_next_off_arr = (ADIO_Offset *) + ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc client_next_off_arr " + "failed\n"); + return -1; + } + + if ((client_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: " + "malloc client_ol_ct_arr failed\n"); + return -1; + } + if ((client_ol_cur_ct_arr = + (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: " + "malloc client_ol_cur_ct_arr failed\n"); + return -1; + } + + /* On the first pass see how many offset-length pairs are + * necessary for each client. Then allocate the correct amount of + * offset-length pairs for describing the collective buffer. All + * data is processed in order by the aggregator's file realm. On + * the second pass, set the offset-length pairs to the correct + * values. */ + for (i = 0; i < MAX_OFF_TYPE; i++) + { + memset(client_comm_next_off_arr, -1, nprocs*sizeof(ADIO_Offset)); + tmp_coll_buf_sz = 0; + ds_fr_end = -1; + + /* initialize heap */ + ADIOI_Heap_create(&offset_heap, nprocs); + offset_heap.size = 0; + + for (j=0; j 0)) { + ADIOI_Heap_insert(&offset_heap, cur_off, j, cur_reg_max_len); +#ifdef DEBUG_HEAP + printf ("initial: inserting offset %lld with " + "cur_reg_max_len = %lld for p%d\n", + cur_off, cur_reg_max_len, j); +#endif + } + + } + if (!offset_heap.size) + ADIOI_Heap_insert(&offset_heap, -1, -1, -1); + + while (tmp_coll_buf_sz < fd->hints->cb_buffer_size) + { + /* Find the next process with the next region within the + * file realm and the maximum amount that can be added for + * this particular file realm as a contiguous region. */ + ADIOI_Heap_extract_min(&offset_heap, &cur_off, &cur_off_proc, + &cur_reg_max_len); +#ifdef DEBUG_HEAP + printf ("extracted cur_off %lld from proc %d\n", + cur_off, cur_off_proc); +#endif + + if (cur_off == -1) + break; + +#ifdef DEBUG3 + fprintf(stderr, "ADIOI_Build_agg_reqs: %s proc %d start/add to" + " list (max_reg_fr=%Ld,tmp_coll_buf_sz=%Ld," + "cb_buffer_size=%d)\n", off_type_name[i], cur_off_proc, + cur_reg_max_len, tmp_coll_buf_sz, + fd->hints->cb_buffer_size); +#endif + + /* We process only contiguous file realm regions if we are + * using data sieving. Note that we only do this for + * writes since reads can be data sieved across each other + * without consistency issues. */ + if ((fd->hints->ds_write == ADIOI_HINT_ENABLE || + fd->hints->ds_write == ADIOI_HINT_AUTO) && + rw_type == ADIOI_WRITE && fd->hints->cb_nodes > 1) + { +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_agg_reqs: " + "Warning - Data sieving writes on\n"); +#endif + if (ds_fr_end == -1) + { + ds_fr_end = cur_off + cur_reg_max_len; +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_agg_reqs: " + "cur_off=%Ld, cur_reg_max_len=%Ld\n" + "Data sieving file realm end initialized to %Ld\n", + cur_off, + cur_reg_max_len, + ds_fr_end); +#endif + } + else + { + /* The next off switched file realms, so we will stop + * here. */ + if (ds_fr_end != cur_off + cur_reg_max_len) + { +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_agg_reqs: " + "Data sieving file realm end changed from " + "%Ld to %Ld\n", ds_fr_end, + cur_off + cur_reg_max_len); +#endif + break; + } + } + } + + /* Add up to the end of the file realm or the collective + * buffer. */ + if (cur_reg_max_len > (fd->hints->cb_buffer_size - + tmp_coll_buf_sz)) + cur_reg_max_len = fd->hints->cb_buffer_size - tmp_coll_buf_sz; + + view_state_add_region( + cur_reg_max_len, + &(client_file_view_state_arr[cur_off_proc]), + &st_reg, &act_reg_sz, i); + + switch(i) + { + case TEMP_OFF: + /* Increment the ol list count for each proc and + * the used part of the collective buffer if the + * next region is not adjacent to the previous + * region. */ + if (client_comm_next_off_arr[cur_off_proc] != + tmp_coll_buf_sz) + { + (client_ol_ct_arr[cur_off_proc])++; + } + client_comm_next_off_arr[cur_off_proc] = + tmp_coll_buf_sz + act_reg_sz; + + if (agg_next_off != st_reg) + agg_ol_ct++; + agg_next_off = st_reg + act_reg_sz; + break; + case REAL_OFF: + /* Add this region to the proper client ol list if + * the next region is not adjacent to the previous + * region. */ + next_off_idx = client_ol_cur_ct_arr[cur_off_proc]; + if (client_comm_next_off_arr[cur_off_proc] != + tmp_coll_buf_sz) + { + client_disp_arr[cur_off_proc][next_off_idx] = + tmp_coll_buf_sz; + client_blk_arr[cur_off_proc][next_off_idx] = + act_reg_sz; + (client_ol_cur_ct_arr[cur_off_proc])++; + } + else + { + client_blk_arr[cur_off_proc][next_off_idx - 1] + += act_reg_sz; + } + client_comm_sz_arr[cur_off_proc] += act_reg_sz; + client_comm_next_off_arr[cur_off_proc] = + tmp_coll_buf_sz + act_reg_sz; + + /* Add to the aggregator filetype if the next + * region is not adjacent to the previous + * region. */ + if (agg_next_off != st_reg) + { + /* this will enable initial offsets much further into + * the file than an MPI_Aint */ + if (!agg_ol_cur_ct) + *agg_dtype_offset_p = st_reg; + agg_disp_arr[agg_ol_cur_ct] = st_reg - + (MPI_Aint) *agg_dtype_offset_p; + agg_blk_arr[agg_ol_cur_ct] = act_reg_sz; + agg_ol_cur_ct++; + } + else + { + agg_blk_arr[agg_ol_cur_ct - 1] += act_reg_sz; + } + agg_next_off = st_reg + act_reg_sz; + + break; + default: + fprintf(stderr, "ADIOI_Build_agg_reqs: Impossible type\n"); + } + tmp_coll_buf_sz += act_reg_sz; + + find_next_off(fd, + &client_file_view_state_arr[cur_off_proc], + fr_st_off_arr[agg_idx], + &(fr_type_arr[agg_idx]), + i, + &next_off, + &next_reg_max_len); + + if ((next_off != -1) || (!offset_heap.size)) { + ADIOI_Heap_insert(&offset_heap, next_off, cur_off_proc, + next_reg_max_len); +#ifdef DEBUG_HEAP + printf ("inserting offset %lld for p%d\n", next_off, + cur_off_proc); +#endif + } + } + + if (i == TEMP_OFF) + { + /* Allocate offset-length pairs for creating hindexed + * MPI_Datatypes for both the client and the aggregator. */ + if ((client_disp_arr = (MPI_Aint **) + ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " + "client_disp_arr failed\n"); + return -1; + } + if ((client_blk_arr = (int **) ADIOI_Malloc( + nprocs*sizeof(int *))) == NULL) + { + ADIOI_Free(client_disp_arr); + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " + "client_blk_arr failed\n"); + return -1; + } + for (j = 0; j < nprocs; j++) + { + if ((client_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc( + client_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " + "client_disp_arr[%d] failed\n", j); + return -1; + } + if ((client_blk_arr[j] = (int *) + ADIOI_Malloc(client_ol_ct_arr[j]*sizeof(int))) == NULL) + { + ADIOI_Free(client_disp_arr[j]); + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " + "client_blk_arr[%d] failed\n", j); + return -1; + } + } + + if (agg_ol_ct > 0) + { + if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc( + agg_ol_ct*sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, + "ADIOI_Build_agg_reqs: malloc disp_arr failed\n"); + return -1; + } + if ((agg_blk_arr = (int *) + ADIOI_Malloc(agg_ol_ct*sizeof(int))) == NULL) + { + ADIOI_Free(agg_disp_arr); + fprintf(stderr, + "ADIOI_Build_agg_reqs: malloc blk_arr failed\n"); + return -1; + } + } + } + ADIOI_Heap_free(&offset_heap); + } + + /* Let the clients know if this aggregator is totally finished + * with all possible client requests. */ + all_done = 1; + for (i = 0; i < nprocs; i++) + { + if ((client_file_view_state_arr[i].cur_state.cur_sz != + client_file_view_state_arr[i].sz) || + client_comm_sz_arr[i] != 0) + { + all_done = 0; + break; + } + } + if (all_done == 1) + { + for (i = 0; i < nprocs; i++) + { + client_comm_sz_arr[i] = -1; + } + } + + /* Quick check to make sure we found all the ol pairs we thought + * we did */ + for (i = 0; i < nprocs; i++) + { + if (client_ol_cur_ct_arr[i] != client_ol_ct_arr[i]) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: ERROR Process %d " + "processed only %d out of %d ol pairs\n", i, + client_ol_cur_ct_arr[i], + client_ol_ct_arr[i]); + return -1; + } + } +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_agg_reqs:(client,ol_pairs,size_req)="); + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "(%d,%d,%Ld)", i, client_ol_ct_arr[i], + client_comm_sz_arr[i]); + if (i != nprocs - 1) + fprintf(stderr, ","); + } + fprintf(stderr, "\n"); +#endif +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_agg_reqs: Generated %d of %d " + "aggregate offset-length pairs\n", agg_ol_cur_ct, agg_ol_ct); +#endif +#ifdef DEBUG2 + for (i = 0; i < nprocs; i++) + { + if (client_ol_ct_arr[i] > 0) + { + fprintf(stderr, "ADIOI_Build_agg_reqs: p %d (off,len) = ", i); + for (j = 0; j < client_ol_ct_arr[i]; j++) + { + fprintf(stderr, "[%d](%d,%d) ", j, + client_disp_arr[i][j], + client_blk_arr[i][j]); + } + fprintf(stderr, "\n"); + } + } + if (agg_ol_ct) { + fprintf(stderr, "ADIOI_Build_agg_reqs:agg_type(off,len)="); + for (i = 0; i < agg_ol_ct; i++) + { + fprintf(stderr, "[%d](%d,%d)", + i, agg_disp_arr[i], agg_blk_arr[i]); + if (i != agg_ol_ct - 1) + fprintf(stderr, ","); + } + fprintf(stderr, "\n"); + } +#endif + + assert(agg_ol_cur_ct == agg_ol_ct); + + /* Create all the client and aggregate MPI_Datatypes */ + for (i = 0; i < nprocs; i++) + { + if (client_comm_sz_arr[i] > 0) + { + MPI_Type_hindexed(client_ol_ct_arr[i], client_blk_arr[i], + client_disp_arr[i], MPI_BYTE, + &(client_comm_dtype_arr[i])); + MPI_Type_commit(&(client_comm_dtype_arr[i])); + } + else + { + client_comm_dtype_arr[i] = MPI_BYTE; + } + ADIOI_Free(client_blk_arr[i]); + ADIOI_Free(client_disp_arr[i]); + } + ADIOI_Free(client_blk_arr); + ADIOI_Free(client_disp_arr); + + if (agg_ol_ct > 0) { + if (agg_ol_ct == 1) + MPI_Type_contiguous (agg_blk_arr[0], MPI_BYTE, agg_dtype_p); + else if (agg_ol_ct > 1) + MPI_Type_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE, + agg_dtype_p); + + MPI_Type_commit(agg_dtype_p); + + ADIOI_Free(agg_disp_arr); + ADIOI_Free(agg_blk_arr); + } + ADIOI_Free(client_ol_ct_arr); + ADIOI_Free(client_ol_cur_ct_arr); + ADIOI_Free(client_comm_next_off_arr); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5017, 0, NULL); +#endif + return 0; +} + +/* All sizes from all aggregators are gathered on the clients, which + * then call this function, which will generate the comm datatypes for + * each aggregator (agg_comm_dtype_arr) in the upcoming + * MPI_Alltoallw() */ +int ADIOI_Build_client_reqs(ADIO_File fd, + int nprocs, + view_state *my_mem_view_state_arr, + view_state *agg_file_view_state_arr, + ADIO_Offset *agg_comm_sz_arr, + MPI_Datatype *agg_comm_dtype_arr) +{ + MPI_Aint **agg_disp_arr = NULL; + int **agg_blk_arr = NULL; + view_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL; + ADIO_Offset total_agg_comm_sz = 0, cur_total_agg_comm_sz = 0; + ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0; + ADIO_Offset cur_off = -1, cur_reg_max_len = -1; + ADIO_Offset tmp_cur_off = -1, tmp_cur_reg_max_len = -1; + ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0; + ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs; + ADIO_Offset *agg_comm_cur_sz_arr = NULL; + MPI_Datatype *fr_type_arr = fd->file_realm_types; + int cb_node_ct = fd->hints->cb_nodes; + int *agg_ol_ct_arr = NULL; + int *agg_ol_cur_ct_arr = NULL; + int agg_fr_idx = -1, tmp_agg_fr_idx = -1; + int cur_off_proc = -1; + int i = 0, j = 0; + int agg_next_off_idx = -1; + /* Used for coalescing ol pairs next to each other. */ + ADIO_Offset *agg_mem_next_off_arr = NULL; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5018, 0, NULL); +#endif + +#ifdef DEBUG + fprintf(stderr, "ADIOI_Build_client_reqs:(agg,size_req)="); + for (i = 0; i < nprocs; i++) + { + int tmp_agg_idx = ADIOI_Agg_idx(i, fd); + if (tmp_agg_idx >= 0) + { + fprintf(stderr, "(%d,%Ld)", i, agg_comm_sz_arr[i]); + if (i != fd->hints->cb_nodes - 1) + fprintf(stderr, ","); + } + fprintf(stderr, "\n"); + } +#endif + + if ((agg_mem_next_off_arr = (ADIO_Offset *) ADIOI_Malloc( + nprocs*sizeof(ADIO_Offset))) == NULL) + { + fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_mem_next_off_arr" + "failed\n"); + return -1; + } + + if ((agg_comm_cur_sz_arr = (ADIO_Offset *) + ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL) + { + fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_comm_cur_sz_arr" + " failed\n"); + return -1; + } + if ((agg_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) + == NULL) + { + fprintf(stderr, "ADIOI_Build_client_reqs: " + "malloc agg_ol_ct_arr failed\n"); + return -1; + } + if ((agg_ol_cur_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) + == NULL) + { + fprintf(stderr, "ADIOI_Build_client_reqs: " + "malloc agg_ol_cur_ct_arr failed\n"); + return -1; + } + + for (i = 0; i < nprocs; i++) + { + if (agg_comm_sz_arr[i] > 0) + total_agg_comm_sz += agg_comm_sz_arr[i]; + } + + /* On the first pass see how many offset-length pairs are + * necessary for each aggregator. Then allocate the correct + * amount of offset-length pairs for handling each aggregator's + * particular data size. On the last pass, we actually create the + * offset-length pairs. */ + for (i = 0; i < MAX_OFF_TYPE; i++) + { + cur_total_agg_comm_sz = 0; + memset(agg_comm_cur_sz_arr, 0, nprocs*sizeof(ADIO_Offset)); + memset(agg_mem_next_off_arr, -1, nprocs*sizeof(ADIO_Offset)); + while (total_agg_comm_sz > cur_total_agg_comm_sz) + { + /* Look for the next aggregator offset among all the + * aggregators and their respective file realms. */ + cur_off = -1; + for (j = 0; j < nprocs; j++) + { + tmp_agg_fr_idx = ADIOI_Agg_idx(j, fd); + assert(tmp_agg_fr_idx < cb_node_ct); + + /* If this process is not an aggregator or we have + * finished all the bytes for this aggregator, move + * along. */ + if (tmp_agg_fr_idx < 0 || + agg_comm_cur_sz_arr[j] == agg_comm_sz_arr[j]) + { + continue; + } + + find_next_off(fd, + &(agg_file_view_state_arr[j]), + fr_st_off_arr[tmp_agg_fr_idx], + &(fr_type_arr[tmp_agg_fr_idx]), + i, + &tmp_cur_off, + &tmp_cur_reg_max_len); + if (tmp_cur_off == -1) + continue; + + if ((cur_off == -1) || + (cur_off > tmp_cur_off)) + { + agg_fr_idx = tmp_agg_fr_idx; + cur_off_proc = j; + cur_off = tmp_cur_off; + cur_reg_max_len = tmp_cur_reg_max_len; + } + } + + assert(cur_off_proc != -1); + + /* Add up to the end of the file realm or as many bytes + * are left for this particular aggregator in the client's + * filetype */ + if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] - + agg_comm_cur_sz_arr[cur_off_proc]) + { + cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] - + agg_comm_cur_sz_arr[cur_off_proc]; + } + assert(cur_reg_max_len > 0); + + view_state_add_region( + cur_reg_max_len, + &(agg_file_view_state_arr[cur_off_proc]), + &st_reg, &act_reg_sz, i); + +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_reqs: %s File region" + " (proc=%d,off=%Ld,sz=%Ld)\n", + off_type_name[i], cur_off_proc, + cur_off, act_reg_sz); +#endif + + /* Before translating the file regions to memory regions, + * we first must advance to the proper point in the + * mem_view_state for this aggregator to match the + * file_view_state. */ + tmp_file_state_p = &(agg_file_view_state_arr[cur_off_proc]); + tmp_mem_state_p = &(my_mem_view_state_arr[cur_off_proc]); + assert(view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz >= + view_state_get_cur_sz(tmp_mem_state_p, i)); + while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz != + view_state_get_cur_sz(tmp_mem_state_p, i)) + { + ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1; + view_state_add_region( + view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz - + view_state_get_cur_sz(tmp_mem_state_p, i), + tmp_mem_state_p, + &fill_st_reg, + &fill_reg_sz, i); + } + + /* Based on how large the act_reg_sz 1. Figure out how + * many memory offset-length pairs are necessary. 2. Set + * the offset-length pairs. */ + tmp_reg_sz = 0; + while (tmp_reg_sz != act_reg_sz) + { + view_state_add_region( + act_reg_sz - tmp_reg_sz, + tmp_mem_state_p, + &agg_mem_st_reg, &agg_mem_act_reg_sz, + i); + tmp_reg_sz += agg_mem_act_reg_sz; + +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_reqs: Mem region %s" + "(proc=%d,off=%Ld,sz=%Ld)\n", + off_type_name[i], cur_off_proc, + agg_mem_st_reg, agg_mem_act_reg_sz); +#endif + agg_comm_cur_sz_arr[cur_off_proc] += agg_mem_act_reg_sz; + cur_total_agg_comm_sz += agg_mem_act_reg_sz; + switch(i) + { + case TEMP_OFF: + /* Increment the ol list count a particular + * aggregator if next region is not adjacent + * to the previous region. */ + if (agg_mem_next_off_arr[cur_off_proc] != + agg_mem_st_reg) + { + agg_ol_ct_arr[cur_off_proc]++; + } + agg_mem_next_off_arr[cur_off_proc] = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + case REAL_OFF: + /* Set the ol list for the memtypes that will + * map to each aggregator, coaslescing if + * possible. */ + agg_next_off_idx = agg_ol_cur_ct_arr[cur_off_proc]; + if (agg_mem_next_off_arr[cur_off_proc] != + agg_mem_st_reg) + { + agg_disp_arr[cur_off_proc][agg_next_off_idx] = + agg_mem_st_reg; + agg_blk_arr[cur_off_proc][agg_next_off_idx] = + agg_mem_act_reg_sz; + (agg_ol_cur_ct_arr[cur_off_proc])++; + } + else + { + agg_blk_arr[cur_off_proc][agg_next_off_idx - 1] + += agg_mem_act_reg_sz; + } + agg_mem_next_off_arr[cur_off_proc] = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + default: + fprintf(stderr, "ADIOI_Build_client_reqs: " + "Impossible type\n"); + } + } + } + + /* On the first pass, allocate the memory structures for + * creating the MPI_hindexed type. */ + if (i == TEMP_OFF) + { + /* Allocate offset-length pairs for creating hindexed + * MPI_Datatypes for each aggregator */ + if ((agg_disp_arr = (MPI_Aint **) + ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL) + { + fprintf(stderr, + "ADIOI_Build_client_reqs: malloc agg_disp_arr failed\n"); + return -1; + } + if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *))) + == NULL) + { + ADIOI_Free(agg_disp_arr); + fprintf(stderr, + "ADIOI_Build_client_reqs: malloc agg_blk_arr failed\n"); + return -1; + } + for (j = 0; j < nprocs; j++) + { + if ((agg_disp_arr[j] = (MPI_Aint *) + ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, "ADIOI_Build_client_reqs: malloc " + "agg_disp_arr[%d] failed\n", j); + return -1; + } + if ((agg_blk_arr[j] = (int *) + ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(int))) == NULL) + { + ADIOI_Free(agg_disp_arr[j]); + fprintf(stderr, "ADIOI_Build_client_reqs: malloc " + "agg_blk_arr[%d] failed\n", j); + return -1; + } + } + } + } + +#ifdef DEBUG + fprintf(stderr, "ADIOI_Build_client_reqs:(agg,cur_ol_count=ol_count)="); + for (i = 0; i < nprocs; i++) + { + int tmp_agg_idx = ADIOI_Agg_idx(i, fd); + if (tmp_agg_idx >= 0) + { + fprintf(stderr, "(%d,%d=%d)", i, agg_ol_cur_ct_arr[i], + agg_ol_ct_arr[i]); + assert(agg_ol_ct_arr[i] == agg_ol_cur_ct_arr[i]); + if (tmp_agg_idx != fd->hints->cb_nodes - 1) + fprintf(stderr, ","); + } + } + fprintf(stderr, "\n"); +#endif + +#ifdef DEBUG2 + for (i = 0; i < nprocs; i++) + { + if (agg_ol_ct_arr[i] > 0) + { + fprintf(stderr, "ADIOI_Build_client_reqs: p %d (off,len) = ", i); + for (j = 0; j < agg_ol_ct_arr[i]; j++) + { + fprintf(stderr, "[%d](%d,%d) ", j, + agg_disp_arr[i][j], + agg_blk_arr[i][j]); + } + fprintf(stderr, "\n"); + } + } +#endif + + /* Create all the aggregator MPI_Datatypes */ + for (i = 0; i < nprocs; i++) + { + if (agg_comm_sz_arr[i] > 0) + { + MPI_Type_hindexed(agg_ol_ct_arr[i], agg_blk_arr[i], + agg_disp_arr[i], MPI_BYTE, + &(agg_comm_dtype_arr[i])); + MPI_Type_commit(&(agg_comm_dtype_arr[i])); + } + else + { + agg_comm_dtype_arr[i] = MPI_BYTE; + } + ADIOI_Free(agg_blk_arr[i]); + ADIOI_Free(agg_disp_arr[i]); + } + ADIOI_Free(agg_blk_arr); + ADIOI_Free(agg_disp_arr); + + ADIOI_Free(agg_mem_next_off_arr); + ADIOI_Free(agg_comm_cur_sz_arr); + ADIOI_Free(agg_ol_ct_arr); + ADIOI_Free(agg_ol_cur_ct_arr); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5019, 0, NULL); +#endif + return 0; +} +/* ADIOI_Build_client_pre_req allows a client to calculate the memtype + * offset-length pairs up (up to a limit - max_pre_req_sz or max + * ol_ct). It basically allows ADIOI_Build_client_req to do less work. + * If it called and there already exist some preprocessed memtype + * offset-length pairs, it will exit immediately if a limit has been + * reached or if will add on the old limites to reach the new + * limits. */ + +int ADIOI_Build_client_pre_req(ADIO_File fd, + int agg_rank, int agg_idx, + view_state *my_mem_view_state_p, + view_state *agg_file_view_state_p, + ADIO_Offset max_pre_req_sz, + int max_ol_ct) +{ + ADIO_Offset act_reg_sz = 0, tmp_reg_sz = 0; + ADIO_Offset cur_off = -1, cur_reg_max_len = -1; + ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0; + int agg_ol_ct = 0, agg_ol_cur_ct = 0; + int i, agg_next_off_idx = -1; + + ADIO_Offset cur_sz = 0, max_sz = 0, agg_mem_next_off = -1; + ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1; + ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs; + MPI_Datatype *fr_type_arr = fd->file_realm_types; + MPI_Aint *tmp_disp_arr = NULL; + int *tmp_blk_arr = NULL, exit_loop = -1; + flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL; +#ifdef DTYPE_SKIP + int skip_type_ct; +#endif + if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes) + { + fprintf(stderr, "ADIOI_Build_client_pre_req: Invalid agg_idx %d\n", + agg_idx); + return -1; + } + + if (agg_file_view_state_p->cur_state.cur_sz == + agg_file_view_state_p->sz || max_pre_req_sz <= 0 || + max_ol_ct <= 0) + { +#ifdef DEBUG1 + fprintf(stderr, + "ADIOI_Build_client_pre_req: Nothing to preprocess\n"); +#endif + return 0; + } + + /* The new limits have already been surpassed by what already + * exists. Otherwise we will use the next restrictions */ + if ((my_mem_view_state_p->pre_sz >= max_pre_req_sz) || + (my_mem_view_state_p->pre_ol_ct >= max_ol_ct)) + { +#ifdef DEBUG1 + fprintf(stderr, + "ADIOI_Build_client_pre_req: Old values surpass new " + "pre_req values\n"); +#endif + return 0; + } + + /* General idea is to first advance the filetype to the file realm + * and then the memtype to the filetype. The memtype is advanced + * further by peeking at the filetype and then the filetype is + * advanced. */ + for (i = 0; i < MAX_OFF_TYPE; i++) + { + switch(i) + { + case TEMP_OFF: + tmp_mem_state_p = &(my_mem_view_state_p->tmp_state); + tmp_file_state_p = &(agg_file_view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_mem_state_p = &(my_mem_view_state_p->cur_state); + tmp_file_state_p = &(agg_file_view_state_p->cur_state); + break; + default: + fprintf(stderr, "ADIOI_Build_client_pre_req: " + "Invalid off type %d\n", i); + } + + if (i == TEMP_OFF && my_mem_view_state_p->pre_sz > 0) + { + cur_sz = my_mem_view_state_p->pre_sz; + agg_ol_ct = my_mem_view_state_p->pre_ol_ct; + /* Save the old arrays */ + tmp_disp_arr = my_mem_view_state_p->pre_disp_arr; + tmp_blk_arr = my_mem_view_state_p->pre_blk_arr; + my_mem_view_state_p->pre_disp_arr = NULL; + my_mem_view_state_p->pre_blk_arr = NULL; + agg_mem_next_off = + tmp_disp_arr[agg_ol_ct - 1] + tmp_blk_arr[agg_ol_ct - 1]; + } + else if (i == REAL_OFF && my_mem_view_state_p->pre_sz > 0) + { + cur_sz = my_mem_view_state_p->pre_sz; + agg_ol_cur_ct = my_mem_view_state_p->pre_ol_ct; + + /* Copy the old data to the new data, freeing the old + * arrays */ + memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr, + my_mem_view_state_p->pre_ol_ct * sizeof(MPI_Aint)); + memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr, + my_mem_view_state_p->pre_ol_ct * sizeof(int)); + + ADIOI_Free(tmp_disp_arr); + ADIOI_Free(tmp_blk_arr); + + agg_mem_next_off = + my_mem_view_state_p->pre_disp_arr[agg_ol_cur_ct - 1] + + my_mem_view_state_p->pre_blk_arr[agg_ol_cur_ct - 1]; + } + else + { + cur_sz = 0; + } + + /* Max_pre_req_sz may be larger than the amount of data left + * to preprocess */ + if (max_pre_req_sz - cur_sz > + agg_file_view_state_p->sz - tmp_file_state_p->cur_sz) + { + max_sz = cur_sz + + agg_file_view_state_p->sz - tmp_file_state_p->cur_sz; + } + else + max_sz = max_pre_req_sz; + + assert(cur_sz != max_sz); +#ifdef DEBUG1 + fprintf(stderr, + "ADIOI_Build_client_pre_req: (cur_sz=%Ld,agg_ol_ct=%d," + "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n", + cur_sz, agg_ol_ct, agg_mem_next_off, max_sz, max_ol_ct); +#endif + while (cur_sz < max_sz) + { + find_next_off(fd, agg_file_view_state_p, + fr_st_off_arr[agg_rank], + &(fr_type_arr[agg_rank]), + i, + &cur_off, + &cur_reg_max_len); + + /* find_next_off may show that the file_view_state is done + * even if cur_sz != max_sz since find_next_off may + * advance the file_view_state to the end here and realize + * that it is done. */ + if (cur_off == -1) + break; + + assert(cur_off != -1); + + /* Before translating the file regions to memory regions, + * we first must advance to the proper point in the + * mem_view_state for this aggregator to match the + * file_view_state. */ + while (tmp_file_state_p->cur_sz != tmp_mem_state_p->cur_sz) + { +#ifdef DTYPE_SKIP + if (my_mem_view_state_p->flat_type_p->count > 1) { + /* let's see if we can skip whole memory datatypes */ + skip_type_ct = + (tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz) / + my_mem_view_state_p->type_sz; + if (skip_type_ct > 0) { + tmp_mem_state_p->cur_sz += + skip_type_ct * my_mem_view_state_p->type_sz; + tmp_mem_state_p->abs_off += + skip_type_ct * my_mem_view_state_p->ext; + if (tmp_mem_state_p->cur_sz == + tmp_file_state_p->cur_sz) + break; + } + } +#endif + view_state_add_region( + tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz, + my_mem_view_state_p, + &fill_st_reg, + &fill_reg_sz, i); + } + + /* Now that the filetype and memtype are advanced to the + * same position, add memtype ol-pairs while we have not + * overstepped the min(end of the current piece in the + * file view, end of the file realm, data left in + * max_sz) */ + + if (cur_reg_max_len > + view_state_get_next_len(agg_file_view_state_p, i)) + cur_reg_max_len = + view_state_get_next_len(agg_file_view_state_p, i); + + if (cur_reg_max_len > max_sz - cur_sz) + cur_reg_max_len = max_sz - cur_sz; + + assert(cur_reg_max_len > 0); + + /* Add memtype ol pairs while we have not passed + * cur_reg_max_len or the max number of ol pairs + * allowed */ + act_reg_sz = 0; + exit_loop = 0; + while ((act_reg_sz < cur_reg_max_len) && + (exit_loop == 0)) + { + view_state_add_region( + cur_reg_max_len - act_reg_sz, + my_mem_view_state_p, + &agg_mem_st_reg, &agg_mem_act_reg_sz, + i); + act_reg_sz += agg_mem_act_reg_sz; + +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_pre_req: %s Mem region" + "(proc=%d,off=%Ld,sz=%Ld)\n", + off_type_name[i], agg_rank, agg_mem_st_reg, + agg_mem_act_reg_sz); +#endif + switch(i) + { + case TEMP_OFF: + /* Increment the ol list count if the next + * region is not adjacent to the previous + * region. */ + if (agg_mem_next_off != agg_mem_st_reg) + { + agg_ol_ct++; + if (agg_ol_ct == max_ol_ct) + exit_loop = 1; + } + agg_mem_next_off = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + case REAL_OFF: + /* Set the ol list for the memtype that + * will map to our aggregator, coaslescing + * if possible. */ + agg_next_off_idx = agg_ol_cur_ct; + if (agg_mem_next_off != agg_mem_st_reg) + { + my_mem_view_state_p-> + pre_disp_arr[agg_next_off_idx] = + agg_mem_st_reg; + my_mem_view_state_p-> + pre_blk_arr[agg_next_off_idx] = + agg_mem_act_reg_sz; + agg_ol_cur_ct++; + if (agg_ol_cur_ct == agg_ol_ct) + exit_loop = 1; + } + else + { + my_mem_view_state_p-> + pre_blk_arr[agg_next_off_idx - 1] + += agg_mem_act_reg_sz; + } + agg_mem_next_off = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + default: + fprintf(stderr, "ADIOI_Build_client_pre_req: " + "Impossible type\n"); + } + } + + /* Advance the filetype flatten state appropriately to + * match the data advanced in the memtype flatten state. + * Should only take at most a single view_state_add_region + * call since the memtype cannot proceed beyond the end of + * a contig piece in the file type. */ + view_state_add_region(act_reg_sz - tmp_reg_sz, + agg_file_view_state_p, + &fill_st_reg, &fill_reg_sz, i); +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_pre_req: %s File region" + " (proc=%d,off=%Ld,sz=%Ld)\n", + off_type_name[i], agg_rank, fill_st_reg, fill_reg_sz); +#endif + if (fill_reg_sz != act_reg_sz) + { + fprintf(stderr, "ADIOI_Build_client_pre_req: " + "view_state_add_region failed to match the memtype\n"); + return -1; + } + + cur_sz += act_reg_sz; + } + + /* On the first pass, allocate the memory structures for + * storing the preprocessed information */ + if (i == TEMP_OFF) + { + if ((my_mem_view_state_p->pre_disp_arr = (MPI_Aint *) + ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, "ADIOI_Build_client_pre_req: malloc " + "pre_disp_arr of size %ld failed\n", + (long int)agg_ol_ct * sizeof(MPI_Aint)); + return -1; + } + if ((my_mem_view_state_p->pre_blk_arr = (int *) + ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL) + { + ADIOI_Free(my_mem_view_state_p->pre_disp_arr); + fprintf(stderr, "ADIOI_Build_client_pre_req: malloc " + "agg_blk_arr of size %ld failed\n", + (long int)agg_ol_ct * sizeof(int)); + return -1; + } + } + } + + my_mem_view_state_p->pre_sz = cur_sz; + my_mem_view_state_p->pre_ol_ct = agg_ol_ct; + +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_client_pre_req:(agg=%d,cur_ol_count=%d" + "=ol_count=%d)\n", + agg_rank, my_mem_view_state_p->pre_ol_ct, agg_ol_ct); +#endif + +#ifdef DEBUG2 + if (agg_ol_ct > 0) + { + fprintf(stderr, "ADIOI_Build_client_pre_req: agg=%d,pre_sz=%Ld " + "(off,len) = \n", agg_rank, my_mem_view_state_p->pre_sz); + for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) + { + fprintf(stderr, "[%d](%d,%d) ", i, + my_mem_view_state_p->pre_disp_arr[i], + my_mem_view_state_p->pre_blk_arr[i]); + if (i % 5 == 0 && i != 0) + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +#endif + + return 0; +} + +/* process_pre_req() allows ADIOI_Build_client_req to use the pre_req + * information. */ + +static int process_pre_req(ADIO_File fd, + int agg_rank, + int agg_idx, + view_state *my_mem_view_state_p, + view_state *agg_file_view_state_p, + ADIO_Offset agg_comm_sz, + int off_type, + MPI_Aint *agg_disp_arr, + int *agg_blk_arr, + ADIO_Offset *agg_comm_pre_sz_p, + ADIO_Offset *agg_comm_cur_sz_p, + ADIO_Offset *agg_comm_sz_p, + int *agg_ol_cur_ct_p, + int *agg_ol_ct_p, + ADIO_Offset *agg_mem_next_off_p) +{ + int i, has_partial = 0; + MPI_Aint partial_disp = 0; + int partial_len = 0; + ADIO_Offset tmp_agg_comm_pre_sz = 0; + + assert (my_mem_view_state_p->pre_sz > 0); + switch(off_type) + { + case TEMP_OFF: + /* Use only some of the precalculated data */ + if (my_mem_view_state_p->pre_sz > *agg_comm_sz_p) + { + for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) + { + if ((my_mem_view_state_p->pre_blk_arr[i] + + *agg_comm_pre_sz_p) > *agg_comm_sz_p) + { + has_partial = 1; + partial_len = *agg_comm_sz_p - *agg_comm_pre_sz_p; + *agg_comm_pre_sz_p = *agg_comm_sz_p; + i++; + break; + } + else if ((my_mem_view_state_p->pre_blk_arr[i] + + *agg_comm_pre_sz_p) == *agg_comm_sz_p) + { + *agg_comm_pre_sz_p += + my_mem_view_state_p->pre_blk_arr[i]; + i++; + break; + } + else + *agg_comm_pre_sz_p += + my_mem_view_state_p->pre_blk_arr[i]; + } + + if (has_partial == 1) + { + *agg_mem_next_off_p = + my_mem_view_state_p->pre_disp_arr[i - 1] + + partial_len; + } + else + { + *agg_mem_next_off_p = + my_mem_view_state_p->pre_disp_arr[i - 1] + + my_mem_view_state_p->pre_blk_arr[i - 1]; + } + + *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; + *agg_ol_ct_p = i; + + } + else /* Use all the precalculated data */ + { + *agg_comm_pre_sz_p = my_mem_view_state_p->pre_sz; + *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; + *agg_ol_ct_p = my_mem_view_state_p->pre_ol_ct; + *agg_mem_next_off_p = + my_mem_view_state_p->pre_disp_arr[ + my_mem_view_state_p->pre_ol_ct - 1] + + my_mem_view_state_p->pre_blk_arr[ + my_mem_view_state_p->pre_ol_ct - 1]; + } +#ifdef DEBUG1 + fprintf(stderr, "process_pre_req: TEMP_OFF " + "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d\n", + *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p); +#endif + assert(*agg_comm_cur_sz_p <= *agg_comm_sz_p); + break; + case REAL_OFF: + /* Set the ol list for the memtype that will map to our + * aggregator, coaslescing if possible. */ + for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) + { + agg_disp_arr[i] = my_mem_view_state_p->pre_disp_arr[i]; + agg_blk_arr[i] = my_mem_view_state_p->pre_blk_arr[i]; + + if ((my_mem_view_state_p->pre_blk_arr[i] + + tmp_agg_comm_pre_sz) > *agg_comm_pre_sz_p) + { + has_partial = 1; + agg_blk_arr[i] = *agg_comm_pre_sz_p - tmp_agg_comm_pre_sz; + tmp_agg_comm_pre_sz = *agg_comm_pre_sz_p; + partial_disp = my_mem_view_state_p->pre_disp_arr[i] + + agg_blk_arr[i]; + partial_len = my_mem_view_state_p->pre_blk_arr[i] - + agg_blk_arr[i]; + i++; + break; + } + else if ((my_mem_view_state_p->pre_blk_arr[i] + + tmp_agg_comm_pre_sz) == *agg_comm_pre_sz_p) + { + tmp_agg_comm_pre_sz += + my_mem_view_state_p->pre_blk_arr[i]; + i++; + break; + } + else + tmp_agg_comm_pre_sz += + my_mem_view_state_p->pre_blk_arr[i]; + } + *agg_mem_next_off_p = agg_disp_arr[i - 1] + agg_blk_arr[i - 1]; + *agg_ol_cur_ct_p = i; + *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; + + /* Clean up the ol pairs we used */ + if ((i < my_mem_view_state_p->pre_ol_ct) || (has_partial == 1)) + { + int remain_ol_ct = + my_mem_view_state_p->pre_ol_ct - i + has_partial; + MPI_Aint *new_pre_disp_arr = NULL; + int *new_pre_blk_arr = NULL; + + if ((new_pre_disp_arr = (MPI_Aint *) + ADIOI_Malloc(remain_ol_ct * sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, "process_pre_req: malloc " + "new_pre_disp_arr failed\n"); + return -1; + } + if ((new_pre_blk_arr = (int *) + ADIOI_Malloc(remain_ol_ct * sizeof(int))) == NULL) + { + fprintf(stderr, "process_pre_req: malloc " + "new_pre_blk_arr failed\n"); + return -1; + } + + memcpy(new_pre_disp_arr, + &(my_mem_view_state_p->pre_disp_arr[i - has_partial]), + remain_ol_ct * sizeof(MPI_Aint)); + memcpy(new_pre_blk_arr, + &(my_mem_view_state_p->pre_blk_arr[i - has_partial]), + remain_ol_ct * sizeof(int)); + + /* Set the partial len of the first piece */ + if (has_partial == 1) + { + /* new_pre_disp_arr[remain_ol_ct - 1] = partial_disp; + new_pre_blk_arr[remain_ol_ct - 1] = partial_len; */ + new_pre_disp_arr[0] = partial_disp; + new_pre_blk_arr[0] = partial_len; + } + + ADIOI_Free(my_mem_view_state_p->pre_disp_arr); + ADIOI_Free(my_mem_view_state_p->pre_blk_arr); + + my_mem_view_state_p->pre_disp_arr = new_pre_disp_arr; + my_mem_view_state_p->pre_blk_arr = new_pre_blk_arr; + my_mem_view_state_p->pre_ol_ct = remain_ol_ct; + my_mem_view_state_p->pre_sz -= *agg_comm_pre_sz_p; + } + else /* Used all the precalculated ol pairs */ + { + ADIOI_Free(my_mem_view_state_p->pre_disp_arr); + ADIOI_Free(my_mem_view_state_p->pre_blk_arr); + + my_mem_view_state_p->pre_disp_arr = NULL; + my_mem_view_state_p->pre_blk_arr = NULL; + my_mem_view_state_p->pre_ol_ct = 0; + my_mem_view_state_p->pre_sz = 0; + } +#ifdef DEBUG1 + fprintf(stderr, "process_pre_req: REAL_OFF " + "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d," + "agg_ol_cur_ct=%d\n", + *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p, + *agg_ol_cur_ct_p); +#endif + break; + default: + fprintf(stderr, "process_pre_req: Invalid off_type %d\n", + off_type); + } + return 0; +} + +/* ADIOI_Build_client_req() creates a memory datatype to transfer data + * to/from a particular aggregator. */ + +int ADIOI_Build_client_req(ADIO_File fd, + int agg_rank, + int agg_idx, + view_state *my_mem_view_state_p, + view_state *agg_file_view_state_p, + ADIO_Offset agg_comm_sz, + MPI_Datatype *agg_comm_dtype_p) +{ + MPI_Aint *agg_disp_arr = NULL; + int *agg_blk_arr = NULL; + ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0; + ADIO_Offset cur_off = -1, cur_reg_max_len = -1; + ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0; + int agg_ol_ct = 0, agg_ol_cur_ct = 0; + int i = 0, agg_next_off_idx = -1; + ADIO_Offset agg_mem_next_off = 0, agg_comm_cur_sz = 0, agg_comm_pre_sz = 0; + ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs; + MPI_Datatype *fr_type_arr = fd->file_realm_types; + flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL; +#ifdef DTYPE_SKIP + int skip_type_ct; +#endif + + if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes) + { +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_client_req: agg_rank %d does not map " + "to a valid node in cb_node\n", agg_rank); +#endif + return 0; + } + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5018, 0, NULL); +#endif + +#ifdef DEBUG1 + fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,size_req=%Ld)\n", + agg_idx, agg_comm_sz); +#endif + + /* On the first pass see how many offset-length pairs are + * necessary for each aggregator. Then allocate the correct + * amount of offset-length pairs for handling each aggregator's + * particular data size. On the last pass, we actually create the + * offset-length pairs. */ + for (i = 0; i < MAX_OFF_TYPE; i++) + { + switch(i) + { + case TEMP_OFF: + tmp_mem_state_p = &(my_mem_view_state_p->tmp_state); + tmp_file_state_p = &(agg_file_view_state_p->tmp_state); + break; + case REAL_OFF: + tmp_mem_state_p = &(my_mem_view_state_p->cur_state); + tmp_file_state_p = &(agg_file_view_state_p->cur_state); + break; + default: + fprintf(stderr, "ADIOI_Build_client_pre_req: " + "Invalid off type %d\n", i); + } + + agg_comm_cur_sz = 0; + agg_mem_next_off = -1; + + /* First try to preprocess anything we can */ + if (my_mem_view_state_p->pre_sz > 0) + { + process_pre_req(fd, + agg_rank, + agg_idx, + my_mem_view_state_p, + agg_file_view_state_p, + agg_comm_sz, + i, + agg_disp_arr, + agg_blk_arr, + &agg_comm_pre_sz, + &agg_comm_cur_sz, + &agg_comm_sz, + &agg_ol_cur_ct, + &agg_ol_ct, + &agg_mem_next_off); + } + + while (agg_comm_cur_sz < agg_comm_sz) + { + find_next_off(fd, agg_file_view_state_p, + fr_st_off_arr[agg_idx], + &(fr_type_arr[agg_idx]), + i, + &cur_off, + &cur_reg_max_len); + + assert(cur_off != -1); + + /* Add up to the end of the file realm or as many bytes + * are left for this particular aggregator in the client's + * filetype */ + if (cur_reg_max_len > (agg_comm_sz - agg_comm_cur_sz)) + { + cur_reg_max_len = agg_comm_sz - agg_comm_cur_sz; + } + assert(cur_reg_max_len > 0); + + view_state_add_region( + cur_reg_max_len, + agg_file_view_state_p, + &st_reg, &act_reg_sz, i); + +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_req: %s File region" + " (proc=%d,off=%Ld,sz=%Ld)\n", + off_type_name[i], agg_rank, cur_off, act_reg_sz); +#endif + + /* Before translating the file regions to memory regions, + * we first must advance to the proper point in the + * mem_view_state for this aggregator to match the + * file_view_state. */ + + assert(tmp_file_state_p->cur_sz - act_reg_sz >= + tmp_mem_state_p->cur_sz); + + while (tmp_file_state_p->cur_sz - act_reg_sz != + tmp_mem_state_p->cur_sz) + { + ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1; +#ifdef DTYPE_SKIP + if (my_mem_view_state_p->flat_type_p->count > 1) { + /* let's see if we can skip whole memory datatypes */ + skip_type_ct = + (tmp_file_state_p->cur_sz - act_reg_sz - + tmp_mem_state_p->cur_sz) / + my_mem_view_state_p->type_sz; + if (skip_type_ct > 0) { + tmp_mem_state_p->cur_sz += + skip_type_ct * my_mem_view_state_p->type_sz; + tmp_mem_state_p->abs_off += + skip_type_ct * my_mem_view_state_p->ext; + if ((tmp_mem_state_p->cur_sz - act_reg_sz) == + tmp_file_state_p->cur_sz) + break; + } + } +#endif + view_state_add_region( + tmp_file_state_p->cur_sz - + act_reg_sz - tmp_mem_state_p->cur_sz, + my_mem_view_state_p, + &fill_st_reg, + &fill_reg_sz, i); + } + + /* Based on how large the act_reg_sz is, first figure + * out how many memory offset-length pairs are + * necessary and then set the offset-length pairs. */ + tmp_reg_sz = 0; + while (tmp_reg_sz != act_reg_sz) + { + view_state_add_region( + act_reg_sz - tmp_reg_sz, + my_mem_view_state_p, + &agg_mem_st_reg, &agg_mem_act_reg_sz, + i); + tmp_reg_sz += agg_mem_act_reg_sz; + +#ifdef DEBUG2 + fprintf(stderr, "ADIOI_Build_client_req: %s Mem region" + "(off=%Ld,sz=%Ld)\n", + off_type_name[i], agg_mem_st_reg, + agg_mem_act_reg_sz); +#endif + agg_comm_cur_sz += agg_mem_act_reg_sz; + switch(i) + { + case TEMP_OFF: + /* Increment the ol list count if the next + * region is not adjacent to the previous + * region. */ + if (agg_mem_next_off != agg_mem_st_reg) + { + agg_ol_ct++; + } + agg_mem_next_off = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + case REAL_OFF: + /* Set the ol list for the memtype that + * will map to our aggregator, coaslescing + * if possible. */ + agg_next_off_idx = agg_ol_cur_ct; + if (agg_mem_next_off != agg_mem_st_reg) + { + agg_disp_arr[agg_next_off_idx] = + agg_mem_st_reg; + agg_blk_arr[agg_next_off_idx] = + agg_mem_act_reg_sz; + agg_ol_cur_ct++; + } + else + { + agg_blk_arr[agg_next_off_idx - 1] + += agg_mem_act_reg_sz; + } + agg_mem_next_off = + agg_mem_st_reg + agg_mem_act_reg_sz; + break; + default: + fprintf(stderr, "ADIOI_Build_client_req: " + "Impossible type\n"); + } + } + } + + /* On the first pass, allocate the memory structures for + * creating the MPI_hindexed type. */ + if (i == TEMP_OFF) + { + /* Allocate offset-length pairs for creating hindexed + * MPI_Datatypes for each aggregator */ + if ((agg_disp_arr = (MPI_Aint *) + ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL) + { + fprintf(stderr, "ADIOI_Build_client_req: malloc " + "agg_disp_arr of size %ld failed\n", + (long int)agg_ol_ct * sizeof(MPI_Aint)); + return -1; + } + if ((agg_blk_arr = (int *) + ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL) + { + ADIOI_Free(agg_disp_arr); + fprintf(stderr, "ADIOI_Build_client_req: malloc " + "agg_blk_arr of size %ld failed\n", + (long int)agg_ol_ct * sizeof(int)); + return -1; + } + } + } + + assert(agg_ol_ct == agg_ol_cur_ct); +#ifdef DEBUG1 + fprintf(stderr, + "ADIOI_Build_client_req:(agg=%d,cur_ol_count=%d=ol_count=%d)\n", + agg_rank, agg_ol_cur_ct, agg_ol_ct); +#endif + +#ifdef DEBUG2 + if (agg_ol_ct > 0) + { + fprintf(stderr, "ADIOI_Build_client_req: p %d (off,len) = ", agg_rank); + for (i = 0; i < agg_ol_ct; i++) + { + fprintf(stderr, "[%d](%d,%d) ", i, + agg_disp_arr[i], agg_blk_arr[i]); + if (i % 5 == 0 && i != 0) + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +#endif +#ifdef DEBUG1 + fprintf(stderr, + "ADIOI_Build_client_req:(agg=%d,pre_ol_count=%d)\n", + agg_idx, my_mem_view_state_p->pre_ol_ct); +#endif + +#ifdef DEBUG2 + if (my_mem_view_state_p->pre_sz > 0) + { + fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ", + agg_idx); + for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) + { + fprintf(stderr, "[%d](%d,%d) ", i, + my_mem_view_state_p->pre_disp_arr[i], + my_mem_view_state_p->pre_blk_arr[i]); + if (i % 5 == 0 && i != 0) + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +#endif + + /* Create the aggregator MPI_Datatype */ + if (agg_comm_sz > 0) + { + MPI_Type_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE, + agg_comm_dtype_p); + MPI_Type_commit(agg_comm_dtype_p); + } + else + { + *agg_comm_dtype_p = MPI_BYTE; + } + + ADIOI_Free(agg_blk_arr); + ADIOI_Free(agg_disp_arr); + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5019, 0, NULL); +#endif + return 0; +} + + diff --git a/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c b/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c new file mode 100644 index 0000000000..25473f6548 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c @@ -0,0 +1,510 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include "adio.h" +#include "adio_extern.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + +/* +#define DEBUG +#define DEBUG2 +*/ + +#define COUNT_EXCH 0 +#define BLOCK_LENS 1 +#define INDICES 2 +#define FPIND_DISP_OFF_SZ 3 + + +typedef struct { + int count; + ADIO_Offset fp_ind; + ADIO_Offset disp; + ADIO_Offset byte_off; + ADIO_Offset sz; + ADIO_Offset ext; + ADIO_Offset type_sz; +} amount_and_extra_data_t; + +/* Debugging function to print out an ADIOI_Flatlist_node. */ +void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p) +{ + int i; + if (flatlist_node_p == NULL) + { + fprintf(stderr, "print flatlist node of NULL ptr\n"); + return; + } + fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n", + flatlist_node_p->count); + for (i = 0; i < flatlist_node_p->count; i++) + { + if (i % 5 == 0 && i != 0) + { + fprintf(stderr, "%d=(%Ld,%Ld)\n", i, flatlist_node_p->indices[i], + flatlist_node_p->blocklens[i]); + } + else + fprintf(stderr, "%d=(%Ld,%Ld) ", i, flatlist_node_p->indices[i], + flatlist_node_p->blocklens[i]); + } + fprintf(stderr, "\n"); +} + +/* Since ADIOI_Flatten_datatype won't add a contig datatype to the + * ADIOI_Flatlist, we can force it to do so with this function. */ +ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type) +{ + int contig_type_sz = -1; + ADIOI_Flatlist_node *flat_node_p = ADIOI_Flatlist; + + /* Add contig type to the end of the list if it doesn't already + * exist. */ + while (flat_node_p->next) + { + if (flat_node_p->type == contig_type) + return flat_node_p; + flat_node_p = flat_node_p->next; + } + if (flat_node_p->type == contig_type) + return flat_node_p; + + MPI_Type_size(contig_type, &contig_type_sz); + if ((flat_node_p->next = (ADIOI_Flatlist_node *) ADIOI_Malloc + (sizeof(ADIOI_Flatlist_node))) == NULL) + { + fprintf(stderr, "ADIOI_Add_contig_flattened: malloc next failed\n"); + } + flat_node_p = flat_node_p->next; + flat_node_p->type = contig_type; + if ((flat_node_p->blocklens = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL) + { + fprintf(stderr, "ADIOI_Flatlist_node: malloc blocklens failed\n"); + } + if ((flat_node_p->indices = (ADIO_Offset *) + ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL) + { + fprintf(stderr, "ADIOI_Flatlist_node: malloc indices failed\n"); + } + flat_node_p->blocklens[0] = contig_type_sz; + flat_node_p->indices[0] = 0; + flat_node_p->count = 1; + flat_node_p->next = NULL; + return flat_node_p; +} + +/* ADIOI_Exchange_file_views - Sends all the aggregators the file + * views and file view states of the clients. It fills in the + * client_file_view_state_arr for the aggregators and the + * my_mem_view_state for the client. It also initializes the + * agg_file_view_state for all clients, which is the view for each + * aggregator of a client's filetype. */ +void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, + ADIO_File fd, int count, + MPI_Datatype datatype, ADIO_Offset off, + view_state *my_mem_view_state_arr, + view_state *agg_file_view_state_arr, + view_state *client_file_view_state_arr) +{ + /* Convert my own fileview to an ADIOI_Flattened type and a + * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes. + * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node + * to/from each of the aggregators with the rest of the file view + * state. */ + + int i = -1, j = -1; + amount_and_extra_data_t *send_count_arr = NULL; + amount_and_extra_data_t *recv_count_arr = NULL; + int send_req_arr_sz = 0; + int recv_req_arr_sz = 0; + MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL; + MPI_Status *statuses = NULL; + ADIO_Offset disp_off_sz_ext_typesz[6]; + MPI_Aint memtype_extent, filetype_extent; + int ret = -1; + + /* parameters for datatypes */ + ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL; + int memtype_sz = -1; + int memtype_is_contig = -1, filetype_is_contig = -1; + int filetype_sz = -1; + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5014, 0, NULL); +#endif + /* The memtype will be freed after the call. The filetype will be + * freed in the close and should have been flattened in the file + * view. */ + MPI_Type_size(datatype, &memtype_sz); + MPI_Type_extent(datatype, &memtype_extent); + if (memtype_sz == memtype_extent) { + memtype_is_contig = 1; + flat_mem_p = ADIOI_Add_contig_flattened(datatype); + flat_mem_p->blocklens[0] = memtype_sz*count; + } + else { + ADIOI_Flatten_datatype(datatype); + flat_mem_p = ADIOI_Flatlist; + while (flat_mem_p->type != datatype) + flat_mem_p = flat_mem_p->next; + } + + MPI_Type_extent(fd->filetype, &filetype_extent); + MPI_Type_size(fd->filetype, &filetype_sz); + if (filetype_extent == filetype_sz) { + filetype_is_contig = 1; + flat_file_p = ADIOI_Add_contig_flattened(fd->filetype); + flat_file_p->blocklens[0] = memtype_sz*count; + filetype_extent = memtype_sz*count; + filetype_sz = filetype_extent; + } + else { + flat_file_p = ADIOI_Flatlist; + while (flat_file_p->type != fd->filetype) + flat_file_p = flat_file_p->next; + } + + disp_off_sz_ext_typesz[0] = fd->fp_ind; + disp_off_sz_ext_typesz[1] = fd->disp; + disp_off_sz_ext_typesz[2] = off; + disp_off_sz_ext_typesz[3] = memtype_sz*count; + disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent; + disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz; + + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); + send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); + } else { + send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes, + sizeof(amount_and_extra_data_t)); + + /* only aggregators receive data */ + if (fd->is_agg) { + recv_count_arr = ADIOI_Calloc(nprocs, + sizeof(amount_and_extra_data_t)); + recv_req_arr = ADIOI_Malloc (nprocs * sizeof(MPI_Request)); + for (i=0; i < nprocs; i++) + MPI_Irecv (&recv_count_arr[i], sizeof(amount_and_extra_data_t), + MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]); + } + + /* only send data to aggregators */ + send_req_arr = ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request)); + for (i=0; i < fd->hints->cb_nodes; i++) { + send_count_arr[i].count = flat_file_p->count; + send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0]; + send_count_arr[i].disp = disp_off_sz_ext_typesz[1]; + send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2]; + send_count_arr[i].sz = disp_off_sz_ext_typesz[3]; + send_count_arr[i].ext = disp_off_sz_ext_typesz[4]; + send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5]; + MPI_Isend (&send_count_arr[i], sizeof(amount_and_extra_data_t), + MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm, + &send_req_arr[i]); + } + } + + + /* Every client has to build mem and file view_states for each aggregator. + * We initialize their values here. and we also initialize + * send_count_arr */ + + if (memtype_is_contig) { + /* if memory is contigous, we now replace memtype_sz and + * memtype_extent with the full access size */ + memtype_sz *= count; + memtype_extent = memtype_sz; + } + + for (i = 0; i < fd->hints->cb_nodes; i++) + { + int tmp_agg_idx = fd->hints->ranklist[i]; + memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state)); + my_mem_view_state_arr[tmp_agg_idx].sz = + disp_off_sz_ext_typesz[3]; + my_mem_view_state_arr[tmp_agg_idx].ext = + (ADIO_Offset) memtype_extent; + my_mem_view_state_arr[tmp_agg_idx].type_sz = + (ADIO_Offset) memtype_sz; + my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p; + ADIOI_init_view_state(file_ptr_type, + 1, + &(my_mem_view_state_arr[tmp_agg_idx]), + TEMP_OFF); + ADIOI_init_view_state(file_ptr_type, + 1, + &(my_mem_view_state_arr[tmp_agg_idx]), + REAL_OFF); + + memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state)); + agg_file_view_state_arr[tmp_agg_idx].fp_ind = + disp_off_sz_ext_typesz[0]; + agg_file_view_state_arr[tmp_agg_idx].disp = + disp_off_sz_ext_typesz[1]; + agg_file_view_state_arr[tmp_agg_idx].byte_off = + disp_off_sz_ext_typesz[2]; + agg_file_view_state_arr[tmp_agg_idx].sz = + disp_off_sz_ext_typesz[3]; + agg_file_view_state_arr[tmp_agg_idx].ext = + disp_off_sz_ext_typesz[4]; + agg_file_view_state_arr[tmp_agg_idx].type_sz = + disp_off_sz_ext_typesz[5]; + agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p; + + ADIOI_init_view_state(file_ptr_type, + 1, + &(agg_file_view_state_arr[tmp_agg_idx]), + TEMP_OFF); + ADIOI_init_view_state(file_ptr_type, + 1, + &(agg_file_view_state_arr[tmp_agg_idx]), + REAL_OFF); + + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + send_count_arr[tmp_agg_idx].count = flat_file_p->count; + send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0]; + send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1]; + send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2]; + send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3]; + send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4]; + send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5]; + } + } + +#ifdef DEBUG2 + fprintf(stderr, "my own flattened memtype: "); + ADIOI_Print_flatlist_node(flat_mem_p); + fprintf(stderr, "my own flattened filetype: "); + ADIOI_Print_flatlist_node(flat_file_p); +#endif + + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t), + MPI_BYTE, + recv_count_arr, sizeof(amount_and_extra_data_t), + MPI_BYTE, fd->comm); + if (ret != MPI_SUCCESS) + { + fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed " + "with error %d", ret); + return; + } + } else { + statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status)); + if (fd->is_agg) { + MPI_Waitall(nprocs, recv_req_arr, statuses); + ADIOI_Free(recv_req_arr); + } + MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses); + ADIOI_Free(statuses); + ADIOI_Free(send_req_arr); + } +#ifdef DEBUG2 + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + fprintf(stderr, "send_count_arr:"); + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count); + } + fprintf(stderr, "\n"); + fprintf(stderr, "recv_count_arr:"); + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count); + } + fprintf(stderr, "\n"); + } else { + fprintf(stderr, "send_count_arr:"); + for (i = 0; i < fd->hints->cb_nodes; i++) + { + fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count); + } + fprintf(stderr, "\n"); + if (fd->is_agg) { + fprintf(stderr, "recv_count_arr:"); + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count); + } + fprintf(stderr, "\n"); + } + } +#endif + + if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) { + for (i=0; i < fd->hints->cb_nodes; i++) + if (send_count_arr[i].count > 0) + send_req_arr_sz++; + } + /* Figure out how many counts to send/recv */ + for (i = 0; i < nprocs; i++) + { + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + if (send_count_arr[i].count > 0) + send_req_arr_sz++; + } + /* Only aggregators should recv*/ + if (fd->is_agg) { + if (recv_count_arr[i].count > 0) + { + if ((client_file_view_state_arr[i].flat_type_p = + (ADIOI_Flatlist_node *) ADIOI_Malloc( + sizeof(ADIOI_Flatlist_node))) == NULL) + { + fprintf(stderr, "ADIOI_Exchange_file_views: malloc " + "flat_type_p failed\n"); + } + client_file_view_state_arr[i].flat_type_p->count = + recv_count_arr[i].count; + client_file_view_state_arr[i].flat_type_p->indices = + (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, + sizeof(ADIO_Offset)); + client_file_view_state_arr[i].flat_type_p->blocklens = + (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, + sizeof(ADIO_Offset)); + + /* Copy the extra data out of the stuff we Alltoall'd */ + memcpy (&client_file_view_state_arr[i].fp_ind, + &recv_count_arr[i].fp_ind, + 6*sizeof(ADIO_Offset)); + + recv_req_arr_sz++; + } + } + } + + /* Since ADIOI_Calloc may do other things we add the +1 + * to avoid a 0-size malloc */ + send_req_arr = (MPI_Request *) ADIOI_Calloc(2*(send_req_arr_sz)+1, + sizeof(MPI_Request)); + + j = 0; + if (recv_req_arr_sz > 0) { + assert (fd->is_agg); + recv_req_arr = (MPI_Request *) ADIOI_Calloc(2*(recv_req_arr_sz), + sizeof(MPI_Request)); + for (i = 0; i < nprocs; i++) { + if (recv_count_arr[i].count > 0) { + MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices, + recv_count_arr[i].count, ADIO_OFFSET, i, + INDICES, fd->comm, &recv_req_arr[j]); + j++; + MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens, + recv_count_arr[i].count, MPI_INT, i, + BLOCK_LENS, fd->comm, &recv_req_arr[j]); + j++; + } + } + } + + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + j = 0; + for (i = 0; i < nprocs; i++) { + if (send_count_arr[i].count > 0) { + MPI_Isend(flat_file_p->indices, + send_count_arr[i].count, ADIO_OFFSET, i, + INDICES, fd->comm, &send_req_arr[j]); + j++; + MPI_Isend(flat_file_p->blocklens, + send_count_arr[i].count, MPI_INT, i, + BLOCK_LENS, fd->comm, &send_req_arr[j]); + j++; + } + } + } else { + j = 0; + for (i = 0; i < fd->hints->cb_nodes; i++) { + if (send_count_arr[i].count > 0) { + MPI_Isend(flat_file_p->indices, + send_count_arr[i].count, ADIO_OFFSET, + fd->hints->ranklist[i], INDICES, fd->comm, + &send_req_arr[j]); + j++; + MPI_Isend(flat_file_p->blocklens, + send_count_arr[i].count, MPI_INT, + fd->hints->ranklist[i], BLOCK_LENS, fd->comm, + &send_req_arr[j]); + j++; + } + } + } + + /* Since ADIOI_Malloc may do other things we add the +1 + * to avoid a 0-size malloc */ + statuses = (MPI_Status *) + ADIOI_Malloc(1 + 2 * ADIOI_MAX(send_req_arr_sz,recv_req_arr_sz) + * sizeof(MPI_Status)); + + if (send_req_arr_sz > 0) { + MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses); + ADIOI_Free(send_count_arr); + ADIOI_Free(send_req_arr); + } + if (recv_req_arr_sz > 0) { + MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses); + ADIOI_Free(recv_count_arr); + ADIOI_Free(recv_req_arr); + } + ADIOI_Free(statuses); + + if (fd->is_agg == 1) + { + ADIOI_init_view_state(file_ptr_type, + nprocs, + client_file_view_state_arr, + TEMP_OFF); + ADIOI_init_view_state(file_ptr_type, + nprocs, + client_file_view_state_arr, + REAL_OFF); + } + +#ifdef DEBUG + if (fd->is_agg == 1) + { + ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist; + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld," + "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i, + client_file_view_state_arr[i].fp_ind, + client_file_view_state_arr[i].disp, + client_file_view_state_arr[i].byte_off, + client_file_view_state_arr[i].sz, + client_file_view_state_arr[i].ext); + } + + while (fr_node_p->type != + fd->file_realm_types[fd->my_cb_nodes_index]) + fr_node_p = fr_node_p->next; + assert(fr_node_p != NULL); + + fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ", + fd->my_cb_nodes_index, + fd->file_realm_st_offs[fd->my_cb_nodes_index]); + ADIOI_Print_flatlist_node(fr_node_p); + } +#endif + +#ifdef DEBUG2 + if (fd->is_agg == 1) + { + for (i = 0; i < nprocs; i++) + { + fprintf(stderr, "client_file_view_state_arr[%d]: ", i); + ADIOI_Print_flatlist_node( + client_file_view_state_arr[i].flat_type_p); + } + } +#endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5015, 0, NULL); +#endif +} diff --git a/ompi/mca/io/romio/romio/adio/common/ad_darray.c b/ompi/mca/io/romio/romio/adio/common/ad_darray.c index b9b3c46d7d..faa2cf694d 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_darray.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_darray.c @@ -81,7 +81,7 @@ int ADIO_Type_create_darray(int size, int rank, int ndims, tmp_size = 1; for (i=1; i=0; i--) { tmp_size *= array_of_gsizes[i+1]; - disps[1] += tmp_size*st_offsets[i]; + disps[1] += (MPI_Aint)tmp_size*st_offsets[i]; } } disps[1] *= orig_extent; disps[2] = orig_extent; - for (i=0; idim; i--) stride *= array_of_gsizes[i]; + for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i]; MPI_Type_hvector(mysize, 1, stride, type_old, type_new); } } - *st_offset = blksize * rank; + *st_offset = (MPI_Aint)blksize * (MPI_Aint)rank; /* in terms of no. of elements of type oldtype in this dimension */ if (mysize == 0) *st_offset = 0; @@ -241,10 +241,10 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc count = local_size/blksize; rem = local_size % blksize; - stride = nprocs*blksize*orig_extent; + stride = (MPI_Aint)nprocs*(MPI_Aint)blksize*orig_extent; if (order == MPI_ORDER_FORTRAN) - for (i=0; idim; i--) stride *= array_of_gsizes[i]; + for (i=0; idim; i--) stride *= (MPI_Aint)array_of_gsizes[i]; MPI_Type_hvector(count, blksize, stride, type_old, type_new); @@ -255,7 +255,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc types[0] = *type_new; types[1] = type_old; disps[0] = 0; - disps[1] = count*stride; + disps[1] = (MPI_Aint)count*stride; blklens[0] = 1; blklens[1] = rem; @@ -272,9 +272,9 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc types[0] = MPI_LB; disps[0] = 0; types[1] = *type_new; - disps[1] = rank * blksize * orig_extent; + disps[1] = (MPI_Aint)rank * (MPI_Aint)blksize * orig_extent; types[2] = MPI_UB; - disps[2] = orig_extent * array_of_gsizes[dim]; + disps[2] = orig_extent * (MPI_Aint)array_of_gsizes[dim]; blklens[0] = blklens[1] = blklens[2] = 1; MPI_Type_struct(3, blklens, disps, types, &type_tmp); MPI_Type_free(type_new); @@ -284,7 +284,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc the struct above */ } else { - *st_offset = rank * blksize; + *st_offset = (MPI_Aint)rank * (MPI_Aint)blksize; /* st_offset is in terms of no. of elements of type oldtype in * this dimension */ } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_end.c b/ompi/mca/io/romio/romio/adio/common/ad_end.c index 3b0778a659..9b05af3e3d 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_end.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_end.c @@ -17,6 +17,15 @@ void ADIO_End(int *error_code) /* FPRINTF(stderr, "reached end\n"); */ + /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure + * that our reference to that errhandler is released */ +/* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c + in routine mca_io_romio_file_close() +*/ +#if 0 + PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN); +#endif + /* delete the flattened datatype list */ curr = ADIOI_Flatlist; while (curr) { @@ -39,7 +48,7 @@ void ADIO_End(int *error_code) datarep = ADIOI_Datarep_head; while (datarep) { datarep_next = datarep->next; -#ifdef MPICH2 +#ifdef HAVE_MPIU_FUNCS MPIU_Free(datarep->name); #else ADIOI_Free(datarep->name); @@ -48,13 +57,18 @@ void ADIO_End(int *error_code) datarep = datarep_next; } + if( ADIOI_syshints != MPI_INFO_NULL) + MPI_Info_free(&ADIOI_syshints); + + MPI_Op_free(&ADIO_same_amode); + *error_code = MPI_SUCCESS; } /* This is the delete callback function associated with - ADIO_Init_keyval when MPI_COMM_WORLD is freed */ + ADIO_Init_keyval when MPI_COMM_SELF is freed */ int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) @@ -62,10 +76,17 @@ int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void int error_code; ADIOI_UNREFERENCED_ARG(comm); - ADIOI_UNREFERENCED_ARG(keyval); ADIOI_UNREFERENCED_ARG(attribute_val); ADIOI_UNREFERENCED_ARG(extra_state); + MPI_Keyval_free(&keyval); + + /* The end call will be called after all possible uses of this keyval, even + * if a file was opened with MPI_COMM_SELF. Note, this assumes LIFO + * MPI_COMM_SELF attribute destruction behavior mandated by MPI-2.2. */ + if (ADIOI_cb_config_list_keyval != MPI_KEYVAL_INVALID) + MPI_Keyval_free(&ADIOI_cb_config_list_keyval); + ADIO_End(&error_code); return error_code; } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_features.c b/ompi/mca/io/romio/romio/adio/common/ad_features.c new file mode 100644 index 0000000000..a62be219a2 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_features.c @@ -0,0 +1,17 @@ +#include "adio.h" + +int ADIOI_GEN_Feature(ADIO_File fd, int flag) +{ + switch(flag) { + case ADIO_LOCKS: + case ADIO_SHARED_FP: + case ADIO_ATOMIC_MODE: + case ADIO_DATA_SIEVING_WRITES: + return 1; + break; + case ADIO_SCALABLE_OPEN: + default: + return 0; + break; + } +} diff --git a/ompi/mca/io/romio/romio/adio/common/ad_fstype.c b/ompi/mca/io/romio/romio/adio/common/ad_fstype.c index 286efc839b..b350effb85 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_fstype.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_fstype.c @@ -26,6 +26,10 @@ #include "pvfs2.h" #endif +#ifdef HAVE_ZOIDFS_H +#include "zoidfs.h" +#endif + /* Notes on detection process: * * There are three more "general" mechanisms that we use for detecting @@ -298,7 +302,7 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code /* --END ERROR HANDLING-- */ #endif /* STATVFS APPROACH */ -#if defined(HAVE_STRUCT_STATFS) && defined(HAVE_STATFS) +#ifdef HAVE_STRUCT_STATFS do { err = statfs(filename, &fsbuf); } while (err && (errno == ESTALE)); @@ -342,14 +346,15 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code } # endif -/*#if defined(LINUX) && defined(ROMIO_LUSTRE)*/ #ifdef ROMIO_LUSTRE -#define LL_SUPER_MAGIC 0x0BD00BD0 +# ifndef LL_SUPER_MAGIC +# define LL_SUPER_MAGIC 0x0BD00BD0 +# endif if (fsbuf.f_type == LL_SUPER_MAGIC) { *fstype = ADIO_LUSTRE; return; } -# endif +#endif # ifdef PAN_KERNEL_FS_CLIENT_SUPER_MAGIC if (fsbuf.f_type == PAN_KERNEL_FS_CLIENT_SUPER_MAGIC) { @@ -386,6 +391,13 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code } # endif +# ifdef XFS_SUPER_MAGIC + if (fsbuf.f_type == XFS_SUPER_MAGIC) { + *fstype = ADIO_XFS; + return; + } +# endif + # ifdef ROMIO_UFS /* if UFS support is enabled, default to that */ *fstype = ADIO_UFS; @@ -460,6 +472,8 @@ static void ADIO_FileSysType_fncall_scalable(MPI_Comm comm, char *filename, int *error_code = buf[1]; } + + /* ADIO_FileSysType_prefix - determines file system type for a file using a prefix on the file name. upper layer should have already determined @@ -511,6 +525,10 @@ static void ADIO_FileSysType_prefix(char *filename, int *fstype, int *error_code else if (!strncmp(filename, "pvfs2:", 6)||!strncmp(filename, "PVFS2:", 6)) { *fstype = ADIO_PVFS2; } + else if (!strncmp(filename, "zoidfs:", 7)|| + !strncmp(filename, "ZOIDFS:", 7)) { + *fstype = ADIO_ZOIDFS; + } else if (!strncmp(filename, "testfs:", 7) || !strncmp(filename, "TESTFS:", 7)) { @@ -801,6 +819,16 @@ void ADIO_ResolveFileType(MPI_Comm comm, char *filename, int *fstype, return; #else *ops = &ADIO_LUSTRE_operations; +#endif + } + if (file_system == ADIO_ZOIDFS) { +#ifndef ROMIO_ZOIDFS + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, + myname, __LINE__, MPI_ERR_IO, + "**iofstypeunsupported", 0); + return; +#else + *ops = &ADIO_ZOIDFS_operations; #endif } *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio/romio/adio/common/ad_hints.c b/ompi/mca/io/romio/romio/adio/common/ad_hints.c index c221b6b4d3..cd755d4a7f 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_hints.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_hints.c @@ -19,8 +19,18 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Info info; char *value; int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0, len; + int ok_to_override_cb_nodes=0; static char myname[] = "ADIOI_GEN_SETINFO"; + + /* if we've already set up default hints and the user has not asked us to + * process any hints (MPI_INFO_NULL), then we can short-circuit hint + * processing */ + if (fd->hints->initialized && fd->info == MPI_INFO_NULL) { + *error_code = MPI_SUCCESS; + return; + } + if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info)); info = fd->info; @@ -37,17 +47,18 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) * previously initialized */ if (!fd->hints->initialized) { + /* buffer size for collective I/O */ - MPI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use * collective buffering */ - MPI_Info_set(info, "romio_cb_read", "automatic"); + ADIOI_Info_set(info, "romio_cb_read", "automatic"); fd->hints->cb_read = ADIOI_HINT_AUTO; - MPI_Info_set(info, "romio_cb_write", "automatic"); - fd->hints->cb_write = ADIOI_HINT_AUTO; + ADIOI_Info_set(info, "romio_cb_write", "automatic"); + fd->hints->cb_write = ADIOI_HINT_AUTO; fd->hints->cb_config_list = NULL; @@ -55,37 +66,71 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Comm_size(fd->comm, &nprocs); nprocs_is_valid = 1; ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs); - MPI_Info_set(info, "cb_nodes", value); + ADIOI_Info_set(info, "cb_nodes", value); fd->hints->cb_nodes = nprocs; /* hint indicating that no indep. I/O will be performed on this file */ - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = 0; - /* deferred_open derrived from no_indep_rw and cb_{read,write} */ + + /* hint instructing the use of persistent file realms */ + ADIOI_Info_set(info, "romio_cb_pfr", "disable"); + fd->hints->cb_pfr = ADIOI_HINT_DISABLE; + + /* hint guiding the assignment of persistent file realms */ + ADIOI_Info_set(info, "romio_cb_fr_types", "aar"); + fd->hints->cb_fr_type = ADIOI_FR_AAR; + + /* hint to align file realms with a certain byte value */ + ADIOI_Info_set(info, "romio_cb_fr_alignment", "1"); + fd->hints->cb_fr_alignment = 1; + + /* hint to set a threshold percentage for a datatype's size/extent at + * which data sieving should be done in collective I/O */ + ADIOI_Info_set(info, "romio_cb_ds_threshold", "0"); + fd->hints->cb_ds_threshold = 0; + + /* hint to switch between point-to-point or all-to-all for two-phase */ + ADIOI_Info_set(info, "romio_cb_alltoall", "automatic"); + fd->hints->cb_alltoall = ADIOI_HINT_AUTO; + + /* deferred_open derived from no_indep_rw and cb_{read,write} */ fd->hints->deferred_open = 0; /* buffer size for data sieving in independent reads */ - MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT); fd->hints->ind_rd_buffer_size = atoi(ADIOI_IND_RD_BUFFER_SIZE_DFLT); /* buffer size for data sieving in independent writes */ - MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT); fd->hints->ind_wr_buffer_size = atoi(ADIOI_IND_WR_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use data * sieving */ - MPI_Info_set(info, "romio_ds_read", "automatic"); + ADIOI_Info_set(info, "romio_ds_read", "automatic"); fd->hints->ds_read = ADIOI_HINT_AUTO; - MPI_Info_set(info, "romio_ds_write", "automatic"); + ADIOI_Info_set(info, "romio_ds_write", "automatic"); fd->hints->ds_write = ADIOI_HINT_AUTO; + /* still to do: tune this a bit for a variety of file systems. there's + * no good default value so just leave it unset */ + fd->hints->min_fdomain_size = 0; + fd->hints->striping_unit = 0; + fd->hints->initialized = 1; + + /* ADIO_Open sets up collective buffering arrays. If we are in this + * path from say set_file_view, then we've don't want to adjust the + * array: we'll get a segfault during collective i/o. We only want to + * look at the users cb_nodes if it's open time */ + ok_to_override_cb_nodes = 1; + } /* add in user's info if supplied */ if (users_info != MPI_INFO_NULL) { - MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval=atoi(value)) > 0)) { tmp_val = intval; @@ -100,30 +145,104 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ - MPI_Info_set(info, "cb_buffer_size", value); + ADIOI_Info_set(info, "cb_buffer_size", value); fd->hints->cb_buffer_size = intval; } + /* aligning file realms to certain sizes (e.g. stripe sizes) + * may benefit I/O performance */ + ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) > 0)) { + tmp_val = intval; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_fr_alignment", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_fr_alignment", value); + fd->hints->cb_fr_alignment = intval; + + } + + /* for collective I/O, try to be smarter about when to do data sieving + * using a specific threshold for the datatype size/extent + * (percentage 0-100%) */ + ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) > 0)) { + tmp_val = intval; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_ds_threshold", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_ds_threshold", value); + fd->hints->cb_ds_threshold = intval; + + } + ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value, + &flag); + if (flag) { + if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_ENABLE; + } + else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_DISABLE; + } + else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + { + ADIOI_Info_set(info, "romio_cb_alltoall", value); + fd->hints->cb_read = ADIOI_HINT_AUTO; + } + + tmp_val = fd->hints->cb_alltoall; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != fd->hints->cb_alltoall) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_alltoall", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + } /* new hints for enabling/disabling coll. buffering on * reads/writes */ - MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag); + ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, + &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_cb_read", value); fd->hints->cb_read = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { /* romio_cb_read overrides no_indep_rw */ - MPI_Info_set(info, "romio_cb_read", value); - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->cb_read = ADIOI_HINT_DISABLE; fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_cb_read", value); + ADIOI_Info_set(info, "romio_cb_read", value); fd->hints->cb_read = ADIOI_HINT_AUTO; } @@ -139,25 +258,25 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* --END ERROR HANDLING-- */ } - MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_cb_write", value); fd->hints->cb_write = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { /* romio_cb_write overrides no_indep_rw, too */ - MPI_Info_set(info, "romio_cb_write", value); - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->cb_write = ADIOI_HINT_DISABLE; fd->hints->no_indep_rw = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_cb_write", value); + ADIOI_Info_set(info, "romio_cb_write", value); fd->hints->cb_write = ADIOI_HINT_AUTO; } @@ -174,24 +293,79 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } + /* enable/disable persistent file realms for collective I/O */ + /* may want to check for no_indep_rdwr hint as well */ + ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value, + &flag); + if (flag) { + if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_ENABLE; + } + else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_DISABLE; + } + else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + { + ADIOI_Info_set(info, "romio_cb_pfr", value); + fd->hints->cb_pfr = ADIOI_HINT_AUTO; + } + + tmp_val = fd->hints->cb_pfr; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != fd->hints->cb_pfr) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_pfr", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + } + + /* file realm assignment types ADIOI_FR_AAR(0), + ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify + a regular fr size in bytes. probably not the best way... */ + ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) >= -2)) { + tmp_val = intval; + + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, + "romio_cb_fr_type", + error_code); + return; + } + /* --END ERROR HANDLING-- */ + + ADIOI_Info_set(info, "romio_cb_fr_type", value); + fd->hints->cb_fr_type = intval; + + } + /* new hint for specifying no indep. read/write will be performed */ - MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "true") || !strcmp(value, "TRUE")) { /* if 'no_indep_rw' set, also hint that we will do * collective buffering: if we aren't doing independent io, * then we have to do collective */ - MPI_Info_set(info, "romio_no_indep_rw", value); - MPI_Info_set(info, "romio_cb_write", "enable"); - MPI_Info_set(info, "romio_cb_read", "enable"); + ADIOI_Info_set(info, "romio_no_indep_rw", value); + ADIOI_Info_set(info, "romio_cb_write", "enable"); + ADIOI_Info_set(info, "romio_cb_read", "enable"); fd->hints->no_indep_rw = 1; fd->hints->cb_read = 1; fd->hints->cb_write = 1; tmp_val = 1; } else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) { - MPI_Info_set(info, "romio_no_indep_rw", value); + ADIOI_Info_set(info, "romio_no_indep_rw", value); fd->hints->no_indep_rw = 0; tmp_val = 0; } @@ -213,86 +387,91 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* new hints for enabling/disabling data sieving on * reads/writes */ - MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_ds_read", value); + ADIOI_Info_set(info, "romio_ds_read", value); fd->hints->ds_read = ADIOI_HINT_AUTO; } /* otherwise ignore */ } - MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_ENABLE; } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_DISABLE; } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(info, "romio_ds_write", value); + ADIOI_Info_set(info, "romio_ds_write", value); fd->hints->ds_write = ADIOI_HINT_AUTO; } /* otherwise ignore */ } - MPI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL, - value, &flag); - if (flag && ((intval=atoi(value)) > 0)) { - tmp_val = intval; + if (ok_to_override_cb_nodes) { + /* MPI_File_open path sets up some data structrues that don't + * get resized in the MPI_File_set_view path, so ignore + * cb_nodes in the set_view case */ + ADIOI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL, + value, &flag); + if (flag && ((intval=atoi(value)) > 0)) { + tmp_val = intval; - MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); - /* --BEGIN ERROR HANDLING-- */ - if (tmp_val != intval) { + MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); + /* --BEGIN ERROR HANDLING-- */ + if (tmp_val != intval) { MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname, "cb_nodes", error_code); return; - } - /* --END ERROR HANDLING-- */ + } + /* --END ERROR HANDLING-- */ - if (!nprocs_is_valid) { - /* if hints were already initialized, we might not - * have already gotten this? - */ - MPI_Comm_size(fd->comm, &nprocs); - nprocs_is_valid = 1; - } - if (intval <= nprocs) { - MPI_Info_set(info, "cb_nodes", value); - fd->hints->cb_nodes = intval; - } - } + if (!nprocs_is_valid) { + /* if hints were already initialized, we might not + * have already gotten this? + */ + MPI_Comm_size(fd->comm, &nprocs); + nprocs_is_valid = 1; + } + if (intval <= nprocs) { + ADIOI_Info_set(info, "cb_nodes", value); + fd->hints->cb_nodes = intval; + } + } + } /* if (ok_to_override_cb_nodes) */ - MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval = atoi(value)) > 0)) { - MPI_Info_set(info, "ind_wr_buffer_size", value); + ADIOI_Info_set(info, "ind_wr_buffer_size", value); fd->hints->ind_wr_buffer_size = intval; } - MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag && ((intval = atoi(value)) > 0)) { - MPI_Info_set(info, "ind_rd_buffer_size", value); + ADIOI_Info_set(info, "ind_rd_buffer_size", value); fd->hints->ind_rd_buffer_size = intval; } - MPI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL, value, &flag); if (flag) { if (fd->hints->cb_config_list == NULL) { @@ -301,7 +480,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) * the cb_config_list hint will be set at file open time * either by the user or to the default */ - MPI_Info_set(info, "cb_config_list", value); + ADIOI_Info_set(info, "cb_config_list", value); len = (strlen(value)+1) * sizeof(char); fd->hints->cb_config_list = ADIOI_Malloc(len); if (fd->hints->cb_config_list == NULL) { @@ -314,13 +493,27 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) * info value with a cb_config_list value in it in a couple * of calls, which would be irritating. */ } + ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL, + value, &flag); + if ( flag && ((intval = atoi(value)) > 0) ) { + ADIOI_Info_set(info, "romio_min_fdomain_size", value); + fd->hints->min_fdomain_size = intval; + } + /* Now we use striping unit in common code so we should + process hints for it. */ + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + value, &flag); + if ( flag && ((intval = atoi(value)) > 0) ) { + ADIOI_Info_set(info, "striping_unit", value); + fd->hints->striping_unit = intval; + } } /* handle cb_config_list default value here; avoids an extra * free/alloc and insures it is always set */ if (fd->hints->cb_config_list == NULL) { - MPI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT); + ADIOI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT); len = (strlen(ADIOI_CB_CONFIG_LIST_DFLT)+1) * sizeof(char); fd->hints->cb_config_list = ADIOI_Malloc(len); if (fd->hints->cb_config_list == NULL) { @@ -341,25 +534,24 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) * disable at the same time doesn't make sense. honor * romio_cb_{read,write} and force the no_indep_rw hint to * 'disable' */ - MPI_Info_set(info, "romio_no_indep_rw", "false"); + ADIOI_Info_set(info, "romio_no_indep_rw", "false"); fd->hints->no_indep_rw = 0; fd->hints->deferred_open = 0; } - if ((fd->file_system == ADIO_PIOFS) || (fd->file_system == ADIO_PVFS) || - (fd->file_system == ADIO_PVFS2) ) { - /* no data sieving for writes in PIOFS, PVFS and PVFS2, because they do not + if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) { + /* disable data sieving for fs that do not support file locking */ - MPI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, + ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &flag); if (flag) { /* get rid of this value if it is set */ - MPI_Info_delete(info, "ind_wr_buffer_size"); + ADIOI_Info_delete(info, "ind_wr_buffer_size"); } /* note: leave ind_wr_buffer_size alone; used for other cases * as well. -- Rob Ross, 04/22/2003 */ - MPI_Info_set(info, "romio_ds_write", "disable"); + ADIOI_Info_set(info, "romio_ds_write", "disable"); fd->hints->ds_write = ADIOI_HINT_DISABLE; } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_init.c b/ompi/mca/io/romio/romio/adio/common/ad_init.c index 1ca0cbb2f9..7ec0dac615 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_init.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_init.c @@ -6,7 +6,6 @@ */ #include "adio.h" -#include "adio_extern.h" ADIOI_Flatlist_node *ADIOI_Flatlist = NULL; ADIOI_Datarep *ADIOI_Datarep_head = NULL; @@ -22,6 +21,10 @@ MPI_Info *MPIR_Infotable = NULL; int MPIR_Infotable_ptr = 0, MPIR_Infotable_max = 0; #endif +MPI_Info ADIOI_syshints = MPI_INFO_NULL; + +MPI_Op ADIO_same_amode=MPI_OP_NULL; + #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE) int ADIOI_Direct_read = 0, ADIOI_Direct_write = 0; #endif @@ -30,6 +33,20 @@ int ADIO_Init_keyval=MPI_KEYVAL_INVALID; MPI_Errhandler ADIOI_DFLT_ERR_HANDLER = MPI_ERRORS_RETURN; + +static void my_consensus(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) +{ + int i, *in, *inout; + in = (int*)invec; + inout = (int*)inoutvec; + + for (i=0; i< *len; i++) { + if (in[i] != inout[i]) + inout[i] = ADIO_AMODE_NOMATCH; + } + return; +} + void ADIO_Init(int *argc, char ***argv, int *error_code) { #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE) @@ -57,6 +74,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) else ADIOI_Direct_write = 0; #endif + /* Assume system-wide hints won't change between runs: move hint processing + * from ADIO_Open to here */ + /* FIXME should be checking error code from MPI_Info_create here */ + MPI_Info_create(&ADIOI_syshints); + ADIOI_process_system_hints(ADIOI_syshints); + #ifdef ADIOI_MPE_LOGGING { MPE_Log_get_state_eventIDs( &ADIOI_MPE_open_a, &ADIOI_MPE_open_b ); @@ -71,9 +94,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) MPE_Log_get_state_eventIDs( &ADIOI_MPE_unlock_a, &ADIOI_MPE_unlock_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_postwrite_a, &ADIOI_MPE_postwrite_b ); + MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a, + &ADIOI_MPE_openinternal_b); + MPE_Log_get_state_eventIDs( &ADIOI_MPE_stat_a, &ADIOI_MPE_stat_b); int comm_world_rank; - PMPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank ); + MPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank ); if ( comm_world_rank == 0 ) { MPE_Describe_state( ADIOI_MPE_open_a, ADIOI_MPE_open_b, @@ -94,9 +120,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) "unlock", "purple" ); MPE_Describe_state( ADIOI_MPE_postwrite_a, ADIOI_MPE_postwrite_b, "postwrite", "ivory" ); + MPE_Describe_state( ADIOI_MPE_openinternal_a, ADIOI_MPE_openinternal_b, "open system", "blue"); + MPE_Describe_state( ADIOI_MPE_stat_a, ADIOI_MPE_stat_b, "stat", "purple"); } } #endif *error_code = MPI_SUCCESS; + MPI_Op_create(my_consensus, 1, &ADIO_same_amode); } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c new file mode 100644 index 0000000000..6c2c275645 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c @@ -0,0 +1,1131 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "assert.h" +#include "adio.h" +#include "adio_extern.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + +/* #define ALLTOALL */ + +/* #define DEBUG */ +/* #define DEBUG2 */ /* print buffers */ + +#define USE_PRE_REQ + +static void Exch_data_amounts (ADIO_File fd, int nprocs, + ADIO_Offset *client_comm_sz_arr, + ADIO_Offset *agg_comm_sz_arr, + int *client_alltoallw_counts, + int *agg_alltoallw_counts, + int *aggregators_done); +static void post_aggregator_comm (MPI_Comm comm, int rw_type, int nproc, + void *cb_buf, + MPI_Datatype *client_comm_dtype_arr, + ADIO_Offset *client_comm_sz_arr, + MPI_Request **requests, + int *aggregators_client_count_p); + +static void post_client_comm (ADIO_File fd, int rw_type, + int agg_rank, void *buf, + MPI_Datatype agg_comm_dtype, + int agg_alltoallw_count, + MPI_Request *request); + +/* Avery Ching and Kenin Columa's reworked two-phase algorithm. Key features + * - persistent file domains + * - an option to use alltoall instead of point-to-point + */ +void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, + int *error_code) +{ + ADIO_Offset min_st_offset=0, max_end_offset=0; + ADIO_Offset st_end_offset[2]; + ADIO_Offset *all_st_end_offsets = NULL; + int filetype_is_contig, buftype_is_contig, is_contig; + ADIO_Offset orig_fp, off; + int interleave_count = 0, i, nprocs, myrank, nprocs_for_coll; + int cb_enable; + ADIO_Offset bufsize; + MPI_Aint extent, bufextent; + int size; + int agg_rank; + + ADIO_Offset agg_disp; /* aggregated file offset */ + MPI_Datatype agg_dtype; /* aggregated file datatype */ + + int aggregators_done = 0; + ADIO_Offset buffered_io_size = 0; + + int *alltoallw_disps; + + int *alltoallw_counts; + int *client_alltoallw_counts; + int *agg_alltoallw_counts; + + char *cb_buf = NULL; + + MPI_Datatype *client_comm_dtype_arr; /* aggregator perspective */ + MPI_Datatype *agg_comm_dtype_arr; /* client perspective */ + ADIO_Offset *client_comm_sz_arr; /* aggregator perspective */ + ADIO_Offset *agg_comm_sz_arr; /* client perspective */ + + /* file views for each client and aggregator */ + view_state *client_file_view_state_arr = NULL; + view_state *agg_file_view_state_arr = NULL; + /* mem views for local process */ + view_state *my_mem_view_state_arr = NULL; + + MPI_Status *agg_comm_statuses = NULL; + MPI_Request *agg_comm_requests = NULL; + MPI_Status *client_comm_statuses = NULL; + MPI_Request *client_comm_requests = NULL; + int aggs_client_count = 0; + int clients_agg_count = 0; + + MPI_Comm_size (fd->comm, &nprocs); + MPI_Comm_rank (fd->comm, &myrank); +#ifdef DEBUG + fprintf (stderr, "p%d: entering ADIOI_IOStridedColl\n", myrank); +#endif +#ifdef AGGREGATION_PROFILE + if (rdwr == ADIOI_READ) + MPE_Log_event (5010, 0, NULL); + else + MPE_Log_event (5012, 0, NULL); +#endif + + /* I need to check if there are any outstanding nonblocking writes + to the file, which could potentially interfere with the writes + taking place in this collective write call. Since this is not + likely to be common, let me do the simplest thing possible here: + Each process completes all pending nonblocking operations before + completing. */ + + nprocs_for_coll = fd->hints->cb_nodes; + orig_fp = fd->fp_ind; + + if (rdwr == ADIOI_READ) + cb_enable = fd->hints->cb_read; + else + cb_enable = fd->hints->cb_write; + + /* only check for interleaving if cb_read isn't disabled */ + if (cb_enable != ADIOI_HINT_DISABLE) { + /* find the starting and ending byte of my I/O access */ + ADIOI_Calc_bounds (fd, count, datatype, file_ptr_type, offset, + &st_end_offset[0], &st_end_offset[1]); + + /* allocate an array of start/end pairs */ + all_st_end_offsets = (ADIO_Offset *) + ADIOI_Malloc (2*nprocs*sizeof(ADIO_Offset)); + MPI_Allgather (st_end_offset, 2, ADIO_OFFSET, all_st_end_offsets, 2, + ADIO_OFFSET, fd->comm); + + min_st_offset = all_st_end_offsets[0]; + max_end_offset = all_st_end_offsets[1]; + + for (i=1; ifiletype, &filetype_is_contig); + + if ((cb_enable == ADIOI_HINT_DISABLE + || (!interleave_count && (cb_enable == ADIOI_HINT_AUTO))) + && (fd->hints->cb_pfr != ADIOI_HINT_ENABLE)){ + if (cb_enable != ADIOI_HINT_DISABLE) { + ADIOI_Free (all_st_end_offsets); + } + + if (buftype_is_contig && filetype_is_contig) { + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { + off = fd->disp + (fd->etype_size) * offset; + if (rdwr == ADIOI_READ) + ADIO_ReadContig(fd, buf, count, datatype, + ADIO_EXPLICIT_OFFSET, off, status, + error_code); + else + ADIO_WriteContig(fd, buf, count, datatype, + ADIO_EXPLICIT_OFFSET, off, status, + error_code); + } + else { + if (rdwr == ADIOI_READ) + ADIO_ReadContig(fd, buf, count, datatype, ADIO_INDIVIDUAL, + 0, status, error_code); + else + ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL, + 0, status, error_code); + } + } + else { + if (rdwr == ADIOI_READ) + ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, + offset, status, error_code); + else + ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, + offset, status, error_code); + } + return; + } + + MPI_Type_extent(datatype, &extent); + bufextent = extent * count; + MPI_Type_size(datatype, &size); + bufsize = size * count; + + /* Calculate file realms */ + if ((fd->hints->cb_pfr != ADIOI_HINT_ENABLE) || + (fd->file_realm_types == NULL)) + ADIOI_Calc_file_realms (fd, min_st_offset, max_end_offset); + + my_mem_view_state_arr = (view_state *) + ADIOI_Calloc (1, nprocs * sizeof(view_state)); + agg_file_view_state_arr = (view_state *) + ADIOI_Calloc (1, nprocs * sizeof(view_state)); + client_comm_sz_arr = (ADIO_Offset *) + ADIOI_Calloc (1, nprocs * sizeof(ADIO_Offset)); + + if (fd->is_agg) { + client_file_view_state_arr = (view_state *) + ADIOI_Calloc (1, nprocs * sizeof(view_state)); + } + else { + client_file_view_state_arr = NULL; + } + + /* Alltoallw doesn't like a null array even if the counts are + * zero. If you do not include this code, it will fail. */ + client_comm_dtype_arr = (MPI_Datatype *) + ADIOI_Calloc (1, nprocs * sizeof(MPI_Datatype)); + if (!fd->is_agg) + for (i = 0; i < nprocs; i++) + client_comm_dtype_arr[i] = MPI_BYTE; + + ADIOI_Exch_file_views (myrank, nprocs, file_ptr_type, fd, count, + datatype, offset, my_mem_view_state_arr, + agg_file_view_state_arr, + client_file_view_state_arr); + + agg_comm_sz_arr = (ADIO_Offset *) + ADIOI_Calloc (1, nprocs * sizeof(ADIO_Offset)); + agg_comm_dtype_arr = (MPI_Datatype *) + ADIOI_Malloc (nprocs * sizeof(MPI_Datatype)); + if (fd->is_agg) { + ADIOI_Build_agg_reqs (fd, rdwr, nprocs, + client_file_view_state_arr, + client_comm_dtype_arr, + client_comm_sz_arr, + &agg_disp, + &agg_dtype); + buffered_io_size = 0; + for (i=0; i 0) + buffered_io_size += client_comm_sz_arr[i]; + } + } +#ifdef USE_PRE_REQ + else + { + /* Example use of ADIOI_Build_client_pre_req. to an + * appropriate section */ + + for (i = 0; i < fd->hints->cb_nodes; i++) + { + agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes]; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5040, 0, NULL); +#endif + ADIOI_Build_client_pre_req( + fd, agg_rank, (i+myrank)%fd->hints->cb_nodes, + &(my_mem_view_state_arr[agg_rank]), + &(agg_file_view_state_arr[agg_rank]), + 2*1024*1024, + 64*1024); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5041, 0, NULL); +#endif + } + } +#endif + + + if (fd->is_agg) + cb_buf = (char *) ADIOI_Malloc (fd->hints->cb_buffer_size); + alltoallw_disps = (int *) ADIOI_Calloc (nprocs, sizeof(int)); + alltoallw_counts = client_alltoallw_counts = (int *) + ADIOI_Calloc (2*nprocs, sizeof(int)); + agg_alltoallw_counts = &alltoallw_counts[nprocs]; + + if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) { + /* aggregators pre-post all Irecv's for incoming data from clients */ + if ((fd->is_agg) && (rdwr == ADIOI_WRITE)) + post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf, + client_comm_dtype_arr, + client_comm_sz_arr, + &agg_comm_requests, + &aggs_client_count); + } + /* Aggregators send amounts for data requested to clients */ + Exch_data_amounts (fd, nprocs, client_comm_sz_arr, agg_comm_sz_arr, + client_alltoallw_counts, agg_alltoallw_counts, + &aggregators_done); + +#ifdef DEBUG + fprintf (stderr, "client_alltoallw_counts[ "); + for (i=0; ihints->cb_alltoall == ADIOI_HINT_DISABLE) { + /* clients should build datatypes for local memory locations + for data communication with aggregators and post + communication as the datatypes are built */ + + client_comm_requests = (MPI_Request *) + ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request)); + + for (i = 0; i < fd->hints->cb_nodes; i++) + { + clients_agg_count = 0; + agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes]; + if (agg_comm_sz_arr[agg_rank] > 0) { + ADIOI_Build_client_req(fd, agg_rank, + (i+myrank)%fd->hints->cb_nodes, + &(my_mem_view_state_arr[agg_rank]), + &(agg_file_view_state_arr[agg_rank]), + agg_comm_sz_arr[agg_rank], + &(agg_comm_dtype_arr[agg_rank])); + +#ifdef AGGREGATION_PROFILE + if (i == 0) + MPE_Log_event (5038, 0, NULL); +#endif + post_client_comm (fd, rdwr, agg_rank, buf, + agg_comm_dtype_arr[agg_rank], + agg_alltoallw_counts[agg_rank], + &client_comm_requests[clients_agg_count]); + clients_agg_count++; + } + } +#ifdef AGGREGATION_PROFILE + if (!clients_agg_count) + MPE_Log_event(5039, 0, NULL); +#endif + + if (rdwr == ADIOI_READ) { + if (fd->is_agg && buffered_io_size) { + ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype, + ADIOI_READ, status, error_code); + if (*error_code != MPI_SUCCESS) return; + MPI_Type_free (&agg_dtype); + } + +#ifdef DEBUG + fprintf (stderr, "expecting from [agg](disp,size,cnt)="); + for (i=0; i < nprocs; i++) { + MPI_Type_size (agg_comm_dtype_arr[i], &size); + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + size, agg_alltoallw_counts[i]); + if (i != nprocs - 1) + fprintf(stderr, ","); + } + fprintf (stderr, "]\n"); + if (fd->is_agg) { + fprintf (stderr, "sending to [client](disp,size,cnt)="); + for (i=0; i < nprocs; i++) { + if (fd->is_agg) + MPI_Type_size (client_comm_dtype_arr[i], &size); + else + size = -1; + + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + size, client_alltoallw_counts[i]); + if (i != nprocs - 1) + fprintf(stderr, ","); + } + fprintf (stderr,"\n"); + } + fflush (NULL); +#endif + /* aggregators post all Isends for outgoing data to clients */ + if (fd->is_agg) + post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf, + client_comm_dtype_arr, + client_comm_sz_arr, + &agg_comm_requests, + &aggs_client_count); + + if (fd->is_agg && aggs_client_count) { + agg_comm_statuses = ADIOI_Malloc(aggs_client_count * + sizeof(MPI_Status)); + MPI_Waitall(aggs_client_count, agg_comm_requests, + agg_comm_statuses); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif + ADIOI_Free (agg_comm_requests); + ADIOI_Free (agg_comm_statuses); + } + + if (clients_agg_count) { + client_comm_statuses = ADIOI_Malloc(clients_agg_count * + sizeof(MPI_Status)); + MPI_Waitall(clients_agg_count, client_comm_requests, + client_comm_statuses); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5039, 0, NULL); +#endif + ADIOI_Free (client_comm_requests); + ADIOI_Free (client_comm_statuses); + } + +#ifdef DEBUG2 + fprintf (stderr, "buffered_io_size = %lld\n", buffered_io_size); + if (fd->is_agg && buffered_io_size) { + fprintf (stderr, "buf = ["); + for (i=0; iis_agg && buffered_io_size) { + assert (aggs_client_count != 0); + /* make sure we actually have the data to write out */ + agg_comm_statuses = (MPI_Status *) + ADIOI_Malloc (aggs_client_count*sizeof(MPI_Status)); + + MPI_Waitall (aggs_client_count, agg_comm_requests, + agg_comm_statuses); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif + ADIOI_Free (agg_comm_requests); + ADIOI_Free (agg_comm_statuses); +#ifdef DEBUG2 + fprintf (stderr, "cb_buf = ["); + for (i=0; iis_agg && buffered_io_size) { + ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype, + ADIOI_READ, status, error_code); + if (*error_code != MPI_SUCCESS) return; + MPI_Type_free (&agg_dtype); + } + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5032, 0, NULL); +#endif + MPI_Alltoallw (cb_buf, client_alltoallw_counts, alltoallw_disps, + client_comm_dtype_arr, + buf, agg_alltoallw_counts , alltoallw_disps, + agg_comm_dtype_arr, + fd->comm); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif + } + else { /* Write Case */ +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5032, 0, NULL); +#endif + MPI_Alltoallw (buf, agg_alltoallw_counts, alltoallw_disps, + agg_comm_dtype_arr, + cb_buf, client_alltoallw_counts, alltoallw_disps, + client_comm_dtype_arr, + fd->comm); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif + if (fd->is_agg && buffered_io_size) { + ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype, + ADIOI_WRITE, status, error_code); + if (*error_code != MPI_SUCCESS) return; + MPI_Type_free (&agg_dtype); + } + } + } + + /* Free (uncommit) datatypes for reuse */ + if (fd->is_agg) { + if (buffered_io_size > 0) { + for (i=0; i 0) + MPI_Type_free (&client_comm_dtype_arr[i]); + } + } + } + for (i=0; i 0) + MPI_Type_free (&agg_comm_dtype_arr[i]); + } + + /* figure out next set up requests */ + if (fd->is_agg) { + ADIOI_Build_agg_reqs (fd, rdwr, nprocs, + client_file_view_state_arr, + client_comm_dtype_arr, + client_comm_sz_arr, + &agg_disp, + &agg_dtype); + buffered_io_size = 0; + for (i=0; i 0) + buffered_io_size += client_comm_sz_arr[i]; + } + } +#ifdef USE_PRE_REQ + else { + /* Example use of ADIOI_Build_client_pre_req. to an + * appropriate section */ + for (i = 0; i < fd->hints->cb_nodes; i++) + { + agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes]; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5040, 0, NULL); +#endif + ADIOI_Build_client_pre_req( + fd, agg_rank, (i+myrank)%fd->hints->cb_nodes, + &(my_mem_view_state_arr[agg_rank]), + &(agg_file_view_state_arr[agg_rank]), + 2*1024*1024, + 64*1024); +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5041, 0, NULL); +#endif + } + } +#endif + + /* aggregators pre-post all Irecv's for incoming data from + * clients. if nothing is needed, agg_comm_requests is not + * allocated */ + if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) { + if ((fd->is_agg) && (rdwr == ADIOI_WRITE)) + post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf, + client_comm_dtype_arr, + client_comm_sz_arr, + &agg_comm_requests, + &aggs_client_count); + } + + /* Aggregators send amounts for data requested to clients */ + Exch_data_amounts (fd, nprocs, client_comm_sz_arr, agg_comm_sz_arr, + client_alltoallw_counts, agg_alltoallw_counts, + &aggregators_done); + + } + + /* Clean up */ + + if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) { + /* AAR, FSIZE, and User provided uniform File realms */ + if (1) { + ADIOI_Delete_flattened (fd->file_realm_types[0]); + MPI_Type_free (&fd->file_realm_types[0]); + } + else { + for (i=0; ihints->cb_nodes; i++) { + ADIOI_Datatype_iscontig(fd->file_realm_types[i], &is_contig); + if (!is_contig) + ADIOI_Delete_flattened(fd->file_realm_types[i]); + MPI_Type_free (&fd->file_realm_types[i]); + } + } + ADIOI_Free (fd->file_realm_types); + ADIOI_Free (fd->file_realm_st_offs); + } + + /* This memtype must be deleted from the ADIOI_Flatlist or else it + * will match incorrectly with other datatypes which use this + * pointer. */ + ADIOI_Delete_flattened(datatype); + ADIOI_Delete_flattened(fd->filetype); + + if (fd->is_agg) { + if (buffered_io_size > 0) + MPI_Type_free (&agg_dtype); + for (i=0; iindices); + ADIOI_Free (client_file_view_state_arr[i].flat_type_p->blocklens); + ADIOI_Free (client_file_view_state_arr[i].flat_type_p); + } + ADIOI_Free (client_file_view_state_arr); + ADIOI_Free (cb_buf); + } + for (i = 0; i 0) + MPI_Type_free (&agg_comm_dtype_arr[i]); + + ADIOI_Free (client_comm_sz_arr); + ADIOI_Free (client_comm_dtype_arr); + ADIOI_Free (my_mem_view_state_arr); + ADIOI_Free (agg_file_view_state_arr); + ADIOI_Free (agg_comm_sz_arr); + ADIOI_Free (agg_comm_dtype_arr); + ADIOI_Free (alltoallw_disps); + ADIOI_Free (alltoallw_counts); + ADIOI_Free (all_st_end_offsets); + +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); + /* This is a temporary way of filling in status. The right way is + * to keep track of how much data was actually read and placed in + * buf during collective I/O. */ +#endif + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef AGGREGATION_PROFILE + if (rdwr == ADIOI_READ) + MPE_Log_event (5011, 0, NULL); + else + MPE_Log_event (5013, 0, NULL); +#endif +} + + +/* Some of this code is from the old Calc_my_off_len() function. + * It calculates the 1st and last byte accessed */ +void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, + int file_ptr_type, ADIO_Offset offset, + ADIO_Offset *st_offset, ADIO_Offset *end_offset) +{ + int filetype_size, buftype_size, etype_size; + int i, sum; + MPI_Aint filetype_extent; + ADIO_Offset total_io; + int filetype_is_contig; + int remainder; + ADIOI_Flatlist_node *flat_file; + + ADIO_Offset st_byte_off, end_byte_off; + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5000, 0, NULL); +#endif + + if (!count) { + /* Max signed positive value for ADIO_Offset + * (arch. dependent?). is there a better way? */ + memset (st_offset, 8, sizeof(ADIO_Offset)); + *st_offset = *st_offset / 2; + *end_offset = -1; + return; + } + + ADIOI_Datatype_iscontig (fd->filetype, &filetype_is_contig); + + MPI_Type_size (fd->filetype, &filetype_size); + MPI_Type_extent (fd->filetype, &filetype_extent); + MPI_Type_size (fd->etype, &etype_size); + MPI_Type_size (buftype, &buftype_size); + + total_io = buftype_size * count; + + if (filetype_is_contig) { + if (file_ptr_type == ADIO_INDIVIDUAL) + st_byte_off = fd->fp_ind; + else + st_byte_off = fd->disp + etype_size * offset; + + end_byte_off = st_byte_off + total_io - 1; + } + else { + flat_file = ADIOI_Flatlist; + while (flat_file->type != fd->filetype) flat_file = flat_file->next; + + /* we need to take care of some weirdness since fd->fp_ind + points at an accessible byte in file. the first accessible + byte in the file is not necessarily the first byte, nor is + it necessarily the first off/len pair in the filetype. */ + if (file_ptr_type == ADIO_INDIVIDUAL) { + st_byte_off = fd->fp_ind; + /* find end byte of I/O (may be in middle of an etype) */ + + /* calculate byte starting point of first filetype */ + end_byte_off = (ADIO_Offset) + ((fd->fp_ind - fd->disp - flat_file->indices[0]) / + filetype_extent) * filetype_extent + fd->disp + + flat_file->indices[0]; + /* number of absolute bytes into first filetype */ + remainder = (fd->fp_ind - fd->disp - flat_file->indices[0]) % + filetype_extent; + if (remainder) { + /* find how many file viewable bytes into first filetype */ + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if ((flat_file->indices[i] - flat_file->indices[0] + + flat_file->blocklens[i]) >= remainder) { + sum -= (flat_file->blocklens[i] - (sum - remainder)); + break; + } + } + total_io += sum; + } + /* byte starting point of last filetype */ + end_byte_off += (total_io - 1) / filetype_size * filetype_extent; + /* number of bytes into last filetype */ + remainder = total_io % filetype_size; + if (!remainder) { + for (i=flat_file->count - 1; i>=0; i--) { + if (flat_file->blocklens[i]) break; + } + assert (i > -1); + end_byte_off += flat_file->indices[i] + + flat_file->blocklens[i] - 1; + end_byte_off -= flat_file->indices[0]; + } + else { + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum >= remainder) { + end_byte_off += flat_file->indices[i] + + flat_file->blocklens[i] - sum + remainder - 1; + break; + } + } + end_byte_off -= flat_file->indices[0]; + } + } + else { + /* find starting byte of I/O (must be aligned with an etype) */ + /* byte starting point of starting filetype */ + st_byte_off = fd->disp + ((offset * etype_size) / filetype_size) * + filetype_extent; + /* number of file viewable bytes into starting filetype */ + remainder = (etype_size * offset) % filetype_size; + + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum >= remainder) { + if (sum == remainder) + st_byte_off += flat_file->indices[i+1]; + else + st_byte_off += flat_file->indices[i] + + flat_file->blocklens[i] - sum + remainder; + break; + } + } + + /* find end byte of I/O (may be in middle of an etype) */ + /* byte starting point of last filetype */ + end_byte_off = fd->disp + (offset * etype_size + total_io) / + filetype_size * filetype_extent; + /* number of bytes into last filetype */ + remainder = (offset * etype_size + total_io) % filetype_size; + + if (!remainder) { + /* the last non-zero off/len pair */ + for (i=flat_file->count-1; i>=0; i--) { + if (flat_file->blocklens[i]) break; + } + assert (i >= 0); + /* back up a whole filetype, and put back up to the + * last byte of the last non-zero offlen pair */ + /* end_byte_off = (end_byte_off - filetype_extent) + + flat_file->indices[i] + + flat_file->blocklens[i] - 1; */ + /* equivalent of above commented out equation */ + end_byte_off -= filetype_extent - flat_file->indices[i] - + flat_file->blocklens[i] + 1; + } + else { + sum = 0; + for (i=0; icount; i++) { + sum += flat_file->blocklens[i]; + if (sum >= remainder) { + end_byte_off += flat_file->indices[i] + + flat_file->blocklens[i] - sum + remainder - 1; + break; + } + } + } + } + } + + *st_offset = st_byte_off; + *end_offset = end_byte_off; +#ifdef DEBUG + printf ("st_offset = %lld\nend_offset = %lld\n", + st_byte_off, end_byte_off); +#endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5001, 0, NULL); +#endif +} + +/* wrapper function for ADIO_WriteStrided and ADIO_ReadStrided. Used + * by new 2 phase code to pass an arbitrary file type directly to + * WriteStrided call without affecting existing code. For the new 2 + * phase code, we really only need to set a custom_ftype, and we can + * assume that this uses MPI_BYTE for the etype, and disp is 0 */ +void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, MPI_Datatype custom_ftype, + int rdwr, ADIO_Status *status, int *error_code) +{ + MPI_Datatype user_filetype; + MPI_Datatype user_etype; + ADIO_Offset user_disp; + int user_ind_wr_buffer_size; + int user_ind_rd_buffer_size; + int f_is_contig, m_is_contig; + int user_ds_read, user_ds_write; + MPI_Aint f_extent; + int f_size; + int f_ds_percent; /* size/extent */ + +#ifdef AGGREGATION_PROFILE + if (rdwr == ADIOI_READ) + MPE_Log_event(5006, 0, NULL); + else + MPE_Log_event(5008, 0, NULL); +#endif + MPI_Type_extent(custom_ftype, &f_extent); + MPI_Type_size(custom_ftype, &f_size); + f_ds_percent = 100 * f_size / f_extent; + + /* temporarily store file view information */ + user_filetype = fd->filetype; + user_etype = fd->etype; + user_disp = fd->disp; + user_ds_read = fd->hints->ds_read; + user_ds_write = fd->hints->ds_write; + /* temporarily override the independent I/O datasieve buffer size */ + user_ind_wr_buffer_size = fd->hints->ind_wr_buffer_size; + user_ind_rd_buffer_size = fd->hints->ind_rd_buffer_size; + + /* set new values for temporary file view */ + fd->filetype = custom_ftype; + fd->etype = MPI_BYTE; + /* set new values for independent I/O datasieve buffer size */ + fd->hints->ind_wr_buffer_size = fd->hints->cb_buffer_size; + fd->hints->ind_rd_buffer_size = fd->hints->cb_buffer_size; + /* decide whether or not to do datasieving */ +#ifdef DEBUG + printf ("f_ds_percent = %d cb_ds_threshold = %d\n", f_ds_percent, + fd->hints->cb_ds_threshold); +#endif + if (f_ds_percent >= fd->hints->cb_ds_threshold) { + fd->hints->ds_read = ADIOI_HINT_ENABLE; + fd->hints->ds_write = ADIOI_HINT_ENABLE; + } + else { + fd->hints->ds_read = ADIOI_HINT_DISABLE; + fd->hints->ds_write = ADIOI_HINT_DISABLE; + } + + /* flatten the new filetype since the strided calls expect it to + * have been flattened in set file view. in the two phase code, + * the datatype passed down should always be MPI_BYTE, and + * therefore contiguous, but just for completeness sake, we'll + * check the memory datatype anyway */ + ADIOI_Datatype_iscontig(custom_ftype, &f_is_contig); + ADIOI_Datatype_iscontig(datatype, &m_is_contig); + if (!f_is_contig) + ADIOI_Flatten_datatype (custom_ftype); + + /* make appropriate Read/Write calls. Let ROMIO figure out file + * system specific stuff. */ + if (f_is_contig && m_is_contig) { + fd->disp = 0; + if (rdwr == ADIOI_READ) + ADIO_ReadContig(fd, buf, count, datatype, file_ptr_type, offset, + status, error_code); + else + ADIO_WriteContig(fd, buf, count, datatype, file_ptr_type, offset, + status, error_code); + } + else { + fd->disp = offset; + if (rdwr == ADIOI_READ) + ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, 0, + status, error_code); + else + ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, 0, + status, error_code); + } + + /* Delete flattened temporary filetype */ + if (!f_is_contig) + ADIOI_Delete_flattened (custom_ftype); + + /* restore the user specified file view to cover our tracks */ + fd->filetype = user_filetype; + fd->etype = user_etype; + fd->disp = user_disp; + fd->hints->ds_read = user_ds_read; + fd->hints->ds_write = user_ds_write; + fd->hints->ind_wr_buffer_size = user_ind_wr_buffer_size; + fd->hints->ind_rd_buffer_size = user_ind_rd_buffer_size; +#ifdef AGGREGATION_PROFILE + if (rdwr == ADIOI_READ) + MPE_Log_event (5007, 0, NULL); + else + MPE_Log_event (5009, 0, NULL); +#endif +} + +static void Exch_data_amounts (ADIO_File fd, int nprocs, + ADIO_Offset *client_comm_sz_arr, + ADIO_Offset *agg_comm_sz_arr, + int *client_alltoallw_counts, + int *agg_alltoallw_counts, + int *aggregators_done) +{ + int i; + int recv_idx; + MPI_Request *recv_requests; + MPI_Request *send_requests; + MPI_Status status; + MPI_Status *send_statuses; + /* Aggregators send amounts for data requested to clients */ + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { + MPI_Alltoall (client_comm_sz_arr, sizeof(ADIO_Offset), MPI_BYTE, + agg_comm_sz_arr, sizeof(ADIO_Offset), MPI_BYTE, + fd->comm); + + if (fd->is_agg) { + for (i=0; i 0) + client_alltoallw_counts[i] = 1; + else + client_alltoallw_counts[i] = 0; + } + *aggregators_done = 0; + for (i=0; i 0) + agg_alltoallw_counts[i] = 1; + else + agg_alltoallw_counts[i] = 0; + } + } else { + /* let's see if we can't reduce some communication as well as + * overlap some communication and work */ + + recv_requests = ADIOI_Malloc (fd->hints->cb_nodes * sizeof(MPI_Request)); + /* post all receives - only receive from aggregators */ + for (i = 0; i < fd->hints->cb_nodes; i++) + MPI_Irecv (&agg_comm_sz_arr[fd->hints->ranklist[i]], + sizeof(ADIO_Offset), MPI_BYTE, fd->hints->ranklist[i], + AMT_TAG, fd->comm, &recv_requests[i]); + + /* Barrier is needed here if we're worried about unexpected + * messages being dropped */ + /* MPI_Barrier (fd->comm); */ + send_requests = NULL; + if (fd->is_agg) { + /* only aggregators send data */ + send_requests = ADIOI_Malloc (nprocs * sizeof(MPI_Request)); + + /* post all sends */ + for (i = 0; i < nprocs; i++) { + MPI_Isend (&client_comm_sz_arr[i], sizeof(ADIO_Offset), + MPI_BYTE, i, AMT_TAG, fd->comm, &send_requests[i]); + + if (client_comm_sz_arr[i] > 0) + client_alltoallw_counts[i] = 1; + else + client_alltoallw_counts[i] = 0; + } + } + + *aggregators_done = 0; + for (i=0; i < fd->hints->cb_nodes; i++) { + MPI_Waitany (fd->hints->cb_nodes, recv_requests, &recv_idx, &status); + if (agg_comm_sz_arr[fd->hints->ranklist[recv_idx]] == -1) + *aggregators_done = *aggregators_done + 1; + else if (agg_comm_sz_arr[fd->hints->ranklist[recv_idx]] > 0) + agg_alltoallw_counts[fd->hints->ranklist[recv_idx]] = 1; + else + agg_alltoallw_counts[fd->hints->ranklist[recv_idx]] = 0; + } + + ADIOI_Free (recv_requests); + if (fd->is_agg) { + /* wait for all sends to complete */ + send_statuses = ADIOI_Malloc (nprocs * sizeof (MPI_Status)); + MPI_Waitall (nprocs, send_requests, send_statuses); + ADIOI_Free (send_requests); + ADIOI_Free (send_statuses); + } + } +} + +static void post_aggregator_comm (MPI_Comm comm, int rw_type, + int nproc, void *cb_buf, + MPI_Datatype *client_comm_dtype_arr, + ADIO_Offset *client_comm_sz_arr, + MPI_Request **requests_p, + int *aggs_client_count_p) +{ + int aggs_client_count = 0; + MPI_Request *requests; + int i; + +#ifdef DEBUG + printf ("posting aggregator communication\n"); +#endif + + for (i=0; i < nproc; i++) + if (client_comm_sz_arr[i] > 0) + aggs_client_count++; +#ifdef DEBUG + printf ("aggregator needs to talk to %d clients\n", + aggs_client_count); +#endif + *aggs_client_count_p = aggs_client_count; + if (aggs_client_count) { + requests = (MPI_Request *) + ADIOI_Malloc (aggs_client_count * sizeof(MPI_Request)); + aggs_client_count = 0; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5032, 0, NULL); +#endif + for (i=0; i < nproc; i++) { + if (client_comm_sz_arr[i] > 0) { + if (rw_type == ADIOI_WRITE) + MPI_Irecv (cb_buf, 1, client_comm_dtype_arr[i], i, + DATA_TAG, comm, + &requests[aggs_client_count]); + else + MPI_Isend (cb_buf, 1, client_comm_dtype_arr[i], i, + DATA_TAG, comm, + &requests[aggs_client_count]); + + aggs_client_count++; + } + } + *requests_p = requests; + } +} + +static void post_client_comm (ADIO_File fd, int rw_type, + int agg_rank, void *buf, + MPI_Datatype agg_comm_dtype, + int agg_alltoallw_count, + MPI_Request *request) +{ + if (agg_alltoallw_count) { + if (rw_type == ADIOI_READ) + MPI_Irecv (buf, 1, agg_comm_dtype, agg_rank, DATA_TAG, fd->comm, + request); + else + MPI_Isend (buf, 1, agg_comm_dtype, agg_rank, DATA_TAG, fd->comm, + request); + } +} + + + diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iread.c b/ompi/mca/io/romio/romio/adio/common/ad_iread.c index 34fce2c1da..2a38295348 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_iread.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_iread.c @@ -45,6 +45,7 @@ void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, static char myname[] = "ADIOI_GEN_IREADCONTIG"; MPI_Type_size(datatype, &typesize); + ADIOI_Assert((count * typesize) == ((ADIO_Offset)(unsigned)count * (ADIO_Offset)typesize)); len = count * typesize; if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; @@ -84,7 +85,7 @@ void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count, if (*error_code == MPI_SUCCESS) { MPI_Type_size(datatype, &typesize); - nbytes = count*typesize; + nbytes = (MPI_Offset)count*(MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c b/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c index 1a35164ccc..c6a2562bd8 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c @@ -21,12 +21,13 @@ void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, MPI_Offset len; MPI_Type_size(datatype, &typesize); - len = count * typesize; + len = (MPI_Offset)count * (MPI_Offset)typesize; /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, + ADIOI_Assert(len == (int) len); /* the count is an int parm */ + ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, &status, error_code); if (*error_code != MPI_SUCCESS) { len=0; @@ -54,7 +55,7 @@ void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size(datatype, &typesize); - nbytes = count*typesize; + nbytes = (MPI_Offset)count*(MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c b/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c index e97a062960..5a18198fb1 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c @@ -55,6 +55,7 @@ void ADIOI_GEN_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Type_size(datatype, &typesize); len = count * typesize; + ADIOI_Assert(len == (int)((ADIO_Offset)count * (ADIO_Offset)typesize)); /* the count is an int parm */ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; aio_errno = ADIOI_GEN_aio(fd, buf, len, offset, 1, request); @@ -85,10 +86,25 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int error_code; struct aiocb *aiocbp; ADIOI_AIO_Request *aio_req; - + MPI_Status status; +#if defined(ROMIO_XFS) + unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz : + fd->hints->fs_hints.xfs.read_chunk_sz; +#endif /* ROMIO_XFS */ fd_sys = fd->fd_sys; +#if defined(ROMIO_XFS) + /* Use Direct I/O if desired and properly aligned */ + if (fd->fns == &ADIO_XFS_operations && + ((wr && fd->direct_write) || (!wr && fd->direct_read)) && + !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && + !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && + (len <= maxiosz)) { + fd_sys = fd->fd_direct; + } +#endif /* ROMIO_XFS */ + aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1); aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1); aiocbp->aio_offset = offset; @@ -133,10 +149,10 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, treat this as a blocking request and return. */ if (wr) ADIO_WriteContig(fd, buf, len, MPI_BYTE, - ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code); + ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); else ADIO_ReadContig(fd, buf, len, MPI_BYTE, - ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code); + ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); MPIO_Completed_request_create(&fd, len, &error_code, request); return 0; @@ -178,7 +194,7 @@ void ADIOI_GEN_IwriteStrided(ADIO_File fd, void *buf, int count, if (*error_code == MPI_SUCCESS) { MPI_Type_size(datatype, &typesize); - nbytes = count * typesize; + nbytes = (MPI_Offset)count * (MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); } @@ -202,7 +218,6 @@ int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status *status) } else if (errno == 0) { int n = aio_return(aio_req->aiocbp); aio_req->nbytes = n; - MPIR_Nest_incr(); errcode = MPI_Grequest_complete(aio_req->req); /* --BEGIN ERROR HANDLING-- */ if (errcode != MPI_SUCCESS) { @@ -213,7 +228,6 @@ int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status *status) 0); } /* --END ERROR HANDLING-- */ - MPIR_Nest_decr(); } return errcode; } @@ -263,7 +277,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, if (errno == 0) { int n = aio_return(aio_reqlist[i]->aiocbp); aio_reqlist[i]->nbytes = n; - MPIR_Nest_incr(); errcode = MPI_Grequest_complete(aio_reqlist[i]->req); if (errcode != MPI_SUCCESS) { errcode = MPIO_Err_create_code(MPI_SUCCESS, @@ -272,7 +285,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, __LINE__, MPI_ERR_IO, "**mpi_grequest_complete", 0); } - MPIR_Nest_decr(); ADIOI_Free(aio_reqlist[i]->aiocbp); aio_reqlist[i]->aiocbp = NULL; cblist[i] = NULL; @@ -289,27 +301,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, return errcode; } -int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) -{ - ADIOI_AIO_Request *aio_req; - - aio_req = (ADIOI_AIO_Request *)extra_state; - - - MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); - - /* do i need to nest_incr/nest_decr here? */ - /* can never cancel so always true */ - MPI_Status_set_cancelled(status, 0); - - /* choose not to return a value for this */ - status->MPI_SOURCE = MPI_UNDEFINED; - /* tag has no meaning for this generalized request */ - status->MPI_TAG = MPI_UNDEFINED; - /* this generalized request never fails */ - return MPI_SUCCESS; -} - int ADIOI_GEN_aio_free_fn(void *extra_state) { ADIOI_AIO_Request *aio_req; @@ -322,6 +313,26 @@ int ADIOI_GEN_aio_free_fn(void *extra_state) return MPI_SUCCESS; } #endif /* working AIO */ + +int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) +{ + ADIOI_AIO_Request *aio_req; + + aio_req = (ADIOI_AIO_Request *)extra_state; + + + MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); + + /* can never cancel so always true */ + MPI_Status_set_cancelled(status, 0); + + /* choose not to return a value for this */ + status->MPI_SOURCE = MPI_UNDEFINED; + /* tag has no meaning for this generalized request */ + status->MPI_TAG = MPI_UNDEFINED; + /* this generalized request never fails */ + return MPI_SUCCESS; +} /* * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c b/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c index 5fce4e3291..1df7d9d436 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c @@ -18,20 +18,22 @@ void ADIOI_FAKE_IwriteContig(ADIO_File fd, void *buf, int count, int *error_code) { ADIO_Status status; - int len, typesize; + MPI_Offset len; + int typesize; MPI_Offset nbytes=0; MPI_Type_size(datatype, &typesize); - len = count * typesize; + len = (MPI_Offset)count * (MPI_Offset)typesize; /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, + ADIOI_Assert(len == (int) len); /* the count is an int parm */ + ADIO_WriteContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size(datatype, &typesize); - nbytes = count*typesize; + nbytes = (MPI_Offset)count*(MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); @@ -57,7 +59,7 @@ void ADIOI_FAKE_IwriteStrided(ADIO_File fd, void *buf, int count, offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size(datatype, &typesize); - nbytes = count * typesize; + nbytes = (MPI_Offset)count * (MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_open.c b/ompi/mca/io/romio/romio/adio/common/ad_open.c index d89f6ac64c..20a9eadd1c 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_open.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_open.c @@ -27,7 +27,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, { MPI_File mpi_fh; ADIO_File fd; - int orig_amode_excl, orig_amode_wronly, err, rank, procs; + int err, rank, procs; static char myname[] = "ADIO_OPEN"; int max_error_code; MPI_Info dupinfo; @@ -59,6 +59,9 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, fd->filetype = filetype; /* MPI_BYTE by default */ fd->etype_size = 1; /* default etype is MPI_BYTE */ + fd->file_realm_st_offs = NULL; + fd->file_realm_types = NULL; + fd->perm = perm; fd->async_count = 0; @@ -70,7 +73,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &procs); /* create and initialize info object */ - fd->hints = (ADIOI_Hints *)ADIOI_Malloc(sizeof(struct ADIOI_Hints_struct)); + fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct)); if (fd->hints == NULL) { /* NEED TO HANDLE ENOMEM ERRORS */ } @@ -79,18 +82,13 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, fd->hints->initialized = 0; fd->info = MPI_INFO_NULL; - if (info == MPI_INFO_NULL) - *error_code = MPI_Info_create(&dupinfo); - else - *error_code = MPI_Info_dup(info, &dupinfo); - if (*error_code != MPI_SUCCESS) - goto fn_exit; - - ADIOI_process_system_hints(dupinfo); + ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo); ADIO_SetInfo(fd, dupinfo, &err); - *error_code = MPI_Info_free(&dupinfo); - if (*error_code != MPI_SUCCESS) - goto fn_exit; + if (dupinfo != MPI_INFO_NULL) { + *error_code = MPI_Info_free(&dupinfo); + if (*error_code != MPI_SUCCESS) + goto fn_exit; + } /* deferred open: * we can only do this optimization if 'fd->hints->deferred_open' is set @@ -103,9 +101,9 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, && uses_generic_write(fd))) { fd->hints->deferred_open = 0; } - if (fd->file_system == ADIO_PVFS2) - /* disable deferred open on PVFS2 so that scalable broadcast will - * always use the propper communicator */ + if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN)) + /* disable deferred open on these fs so that scalable broadcast + * will always use the propper communicator */ fd->hints->deferred_open = 0; @@ -123,134 +121,30 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, * IO */ fd->agg_comm = MPI_COMM_NULL; fd->is_open = 0; + fd->my_cb_nodes_index = -2; + fd->is_agg = is_aggregator(rank, fd); if (fd->hints->deferred_open) { /* MPI_Comm_split will create a communication group of aggregators. * for non-aggregators it will return MPI_COMM_NULL . we rely on * fd->agg_comm == MPI_COMM_NULL for non-aggregators in several * tests in the code */ - if (is_aggregator(rank, fd)) { + if (fd->is_agg) { MPI_Comm_split(fd->comm, 1, 0, &aggregator_comm); fd->agg_comm = aggregator_comm; } else { MPI_Comm_split(fd->comm, MPI_UNDEFINED, 0, &aggregator_comm); fd->agg_comm = aggregator_comm; } + } - orig_amode_excl = access_mode; + /* actual opens start here */ + /* generic open: one process opens to create the file, all others open */ + /* nfs open: everybody opens or else you'll end up with "file not found" + * due to stupid nfs consistency semantics */ + /* scalable open: one process opens and broadcasts results to everyone */ - /* optimization: by having just one process create a file, close it, then - * have all N processes open it, we can possibly avoid contention for write - * locks on a directory for some file systems. - * - * we used to special-case EXCL|CREATE, since when N processes are trying - * to create a file exclusively, only 1 will succeed and the rest will - * (spuriously) fail. Since we are now carrying out the CREATE on one - * process anyway, the EXCL case falls out and we don't need to explicitly - * worry about it, other than turning off both the EXCL and CREATE flags - */ - /* pvfs2 handles opens specially, so it is actually more efficent for that - * file system if we skip this optimization */ - /* NFS handles opens especially poorly, so we cannot use this optimization - * on that FS */ - if (fd->file_system == ADIO_NFS) { - /* no optimizations for NFS: */ - if ((access_mode & ADIO_CREATE) && (access_mode & ADIO_EXCL)) { - /* the open should fail if the file exists. Only *1* process should - check this. Otherwise, if all processes try to check and the file - does not exist, one process will create the file and others who - reach later will return error. */ - if(rank == fd->hints->ranklist[0]) { - fd->access_mode = access_mode; - (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - MPI_Bcast(error_code, 1, MPI_INT, \ - fd->hints->ranklist[0], fd->comm); - /* if no error, close the file and reopen normally below */ - if (*error_code == MPI_SUCCESS) - (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); - } - else MPI_Bcast(error_code, 1, MPI_INT, - fd->hints->ranklist[0], fd->comm); - if (*error_code != MPI_SUCCESS) { - goto fn_exit; - } - else { - /* turn off EXCL for real open */ - access_mode = access_mode ^ ADIO_EXCL; - } - } - } else { - - /* the actual optimized create on one, open on all */ - if (access_mode & ADIO_CREATE && fd->file_system != ADIO_PVFS2) { - if(rank == fd->hints->ranklist[0]) { - /* remove delete_on_close flag if set */ - if (access_mode & ADIO_DELETE_ON_CLOSE) - fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE; - else - fd->access_mode = access_mode; - - (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - MPI_Bcast(error_code, 1, MPI_INT, \ - fd->hints->ranklist[0], fd->comm); - /* if no error, close the file and reopen normally below */ - if (*error_code == MPI_SUCCESS) - (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); - - fd->access_mode = access_mode; /* back to original */ - } - else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); - - if (*error_code != MPI_SUCCESS) { - goto fn_exit; - } - else { - /* turn off CREAT (and EXCL if set) for real multi-processor open */ - access_mode ^= ADIO_CREATE; - if (access_mode & ADIO_EXCL) - access_mode ^= ADIO_EXCL; - } - } - } - - /* if we are doing deferred open, non-aggregators should return now */ - if (fd->hints->deferred_open ) { - if (fd->agg_comm == MPI_COMM_NULL) { - /* we might have turned off EXCL for the aggregators. - * restore access_mode that non-aggregators get the right - * value from get_amode */ - fd->access_mode = orig_amode_excl; - *error_code = MPI_SUCCESS; - goto fn_exit; - } - } - -/* For writing with data sieving, a read-modify-write is needed. If - the file is opened for write_only, the read will fail. Therefore, - if write_only, open the file as read_write, but record it as write_only - in fd, so that get_amode returns the right answer. */ - - orig_amode_wronly = access_mode; - if (access_mode & ADIO_WRONLY) { - access_mode = access_mode ^ ADIO_WRONLY; - access_mode = access_mode | ADIO_RDWR; - } - fd->access_mode = access_mode; - - (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - - /* if error, may be it was due to the change in amode above. - therefore, reopen with access mode provided by the user.*/ - fd->access_mode = orig_amode_wronly; - if (*error_code != MPI_SUCCESS) - (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - - /* if we turned off EXCL earlier, then we should turn it back on */ - if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl; - - /* for deferred open: this process has opened the file (because if we are - * not an aggregaor and we are doing deferred open, we returned earlier)*/ - fd->is_open = 1; + ADIOI_OpenColl(fd, rank, access_mode, error_code); fn_exit: MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm); @@ -300,10 +194,18 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, int is_aggregator(int rank, ADIO_File fd ) { int i; - for (i=0; i< fd->hints->cb_nodes; i++ ) { - if ( rank == fd->hints->ranklist[i] ) - return 1; + if (fd->my_cb_nodes_index == -2) { + for (i=0; i< fd->hints->cb_nodes; i++ ) { + if ( rank == fd->hints->ranklist[i] ) { + fd->my_cb_nodes_index = i; + return 1; + } + } + fd->my_cb_nodes_index = -1; } + else if (fd->my_cb_nodes_index != -1) + return 1; + return 0; } @@ -369,7 +271,7 @@ static int build_cb_config_list(ADIO_File fd, /* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR FS-INDEP. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", rank_ct); - MPI_Info_set(fd->info, "cb_nodes", value); + ADIOI_Info_set(fd->info, "cb_nodes", value); ADIOI_Free(value); } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c new file mode 100644 index 0000000000..243fe84b84 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c @@ -0,0 +1,108 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2007 UChicago/Argonne LLC + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" + +/* Generic version of a "collective open". Assumes a "real" underlying + * file system (meaning no wonky consistency semantics like NFS). + * + * optimization: by having just one process create a file, close it, + * then have all N processes open it, we can possibly avoid contention + * for write locks on a directory for some file systems. + * + * Happy side-effect: exclusive create (error if file already exists) + * just falls out + * + * Note: this is not a "scalable open" (c.f. "The impact of file systems + * on MPI-IO scalability"). + */ + +void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code) +{ + int orig_amode_excl, orig_amode_wronly; + MPI_Comm tmp_comm; + + orig_amode_excl = access_mode; + + if (access_mode & ADIO_CREATE ){ + if(rank == fd->hints->ranklist[0]) { + /* remove delete_on_close flag if set */ + if (access_mode & ADIO_DELETE_ON_CLOSE) + fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE; + else + fd->access_mode = access_mode; + + tmp_comm = fd->comm; + fd->comm = MPI_COMM_SELF; + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + fd->comm = tmp_comm; + MPI_Bcast(error_code, 1, MPI_INT, \ + fd->hints->ranklist[0], fd->comm); + /* if no error, close the file and reopen normally below */ + if (*error_code == MPI_SUCCESS) + (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); + + fd->access_mode = access_mode; /* back to original */ + } + else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); + + if (*error_code != MPI_SUCCESS) { + return; + } + else { + /* turn off CREAT (and EXCL if set) for real multi-processor open */ + access_mode ^= ADIO_CREATE; + if (access_mode & ADIO_EXCL) + access_mode ^= ADIO_EXCL; + } + } + + /* if we are doing deferred open, non-aggregators should return now */ + if (fd->hints->deferred_open ) { + if (fd->agg_comm == MPI_COMM_NULL) { + /* we might have turned off EXCL for the aggregators. + * restore access_mode that non-aggregators get the right + * value from get_amode */ + fd->access_mode = orig_amode_excl; + *error_code = MPI_SUCCESS; + return; + } + } + +/* For writing with data sieving, a read-modify-write is needed. If + the file is opened for write_only, the read will fail. Therefore, + if write_only, open the file as read_write, but record it as write_only + in fd, so that get_amode returns the right answer. */ + + orig_amode_wronly = access_mode; + if (access_mode & ADIO_WRONLY) { + access_mode = access_mode ^ ADIO_WRONLY; + access_mode = access_mode | ADIO_RDWR; + } + fd->access_mode = access_mode; + + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* if we turned off EXCL earlier, then we should turn it back on */ + if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl; + + /* for deferred open: this process has opened the file (because if we are + * not an aggregaor and we are doing deferred open, we returned earlier)*/ + fd->is_open = 1; + +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c new file mode 100644 index 0000000000..4dcdfc729d --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c @@ -0,0 +1,89 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2007 UChicago/Argonne LLC + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" + + +/* this "collective" open is useful for frankly broken file systems such + * as NFS where a create from one client might not be immediately + * visible on another */ + +void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code) +{ + int orig_amode_excl, orig_amode_wronly; + + orig_amode_excl = access_mode; + if ((access_mode & ADIO_CREATE) && (access_mode & ADIO_EXCL)) { + /* the open should fail if the file exists. Only *1* process + * should check this. Otherwise, if all processes try to check + * and the file does not exist, one process will create the file + * and others who reach later will return error. */ + if(rank == fd->hints->ranklist[0]) { + fd->access_mode = access_mode; + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + MPI_Bcast(error_code, 1, MPI_INT, \ + fd->hints->ranklist[0], fd->comm); + /* if no error, close the file and reopen normally below */ + if (*error_code == MPI_SUCCESS) + (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); + } + else MPI_Bcast(error_code, 1, MPI_INT, + fd->hints->ranklist[0], fd->comm); + if (*error_code != MPI_SUCCESS) { + return; + } + else { + /* turn off EXCL for real open */ + access_mode = access_mode ^ ADIO_EXCL; + } + } + /* if we are doing deferred open, non-aggregators should return now */ + if (fd->hints->deferred_open ) { + if (fd->agg_comm == MPI_COMM_NULL) { + /* we might have turned off EXCL for the aggregators. + * restore access_mode that non-aggregators get the right + * value from get_amode */ + fd->access_mode = orig_amode_excl; + *error_code = MPI_SUCCESS; + return; + } + } + +/* For writing with data sieving, a read-modify-write is needed. If + the file is opened for write_only, the read will fail. Therefore, + if write_only, open the file as read_write, but record it as write_only + in fd, so that get_amode returns the right answer. */ + + orig_amode_wronly = access_mode; + if (access_mode & ADIO_WRONLY) { + access_mode = access_mode ^ ADIO_WRONLY; + access_mode = access_mode | ADIO_RDWR; + } + fd->access_mode = access_mode; + + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* if we turned off EXCL earlier, then we should turn it back on */ + if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl; + + /* for deferred open: this process has opened the file (because if we are + * not an aggregaor and we are doing deferred open, we returned earlier)*/ + fd->is_open = 1; +} + + + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c new file mode 100644 index 0000000000..eee8520c6d --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c @@ -0,0 +1,57 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2007 UChicago/Argonne LLC + * See COPYRIGHT notice in top-level directory. + */ + +#include "adio.h" + +/* + * Scalable open: for file systems capable of having one process + * create/open a file and broadcast the result to everyone else. + * - Does not need one process to create the file + * - Does not need special handling for CREATE|EXCL + */ +void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code) +{ + int orig_amode_wronly; + + /* if we are doing deferred open, non-aggregators should return now */ + if (fd->hints->deferred_open ) { + if (fd->agg_comm == MPI_COMM_NULL) { + *error_code = MPI_SUCCESS; + return; + } + } + + /* For writing with data sieving, a read-modify-write is needed. If + the file is opened for write_only, the read will fail. Therefore, + if write_only, open the file as read_write, but record it as + write_only in fd, so that get_amode returns the right answer. */ + + orig_amode_wronly = access_mode; + if (access_mode & ADIO_WRONLY) { + access_mode = access_mode ^ ADIO_WRONLY; + access_mode = access_mode | ADIO_RDWR; + } + fd->access_mode = access_mode; + + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) + (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); + + /* for deferred open: this process has opened the file (because if we are + * not an aggregaor and we are doing deferred open, we returned earlier)*/ + fd->is_open = 1; + +} + +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c b/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c index 4b44ccb3c3..cd18dbb283 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c @@ -47,7 +47,10 @@ void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code) for (i=0; i #endif +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err = -1, datatype_size, len; + int err = -1, datatype_size; + ADIO_Offset len; static char myname[] = "ADIOI_GEN_READCONTIG"; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5034, 0, NULL); +#endif MPI_Type_size(datatype, &datatype_size); - len = datatype_size * count; + len = (ADIO_Offset)datatype_size * (ADIO_Offset)count; + ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */ if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; @@ -50,7 +58,7 @@ void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err = read(fd->fd_sys, buf, len); + err = read(fd->fd_sys, buf, (unsigned int)len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif @@ -77,4 +85,7 @@ void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, #endif *error_code = MPI_SUCCESS; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5035, 0, NULL); +#endif } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c index 303fb53ba5..ddbcd9157d 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c @@ -8,18 +8,25 @@ #include "adio.h" #include "adio_extern.h" +#ifdef USE_DBG_LOGGING + #define RDCOLL_DEBUG 1 +#endif +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + /* prototypes of functions used for collective reads only. */ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, ADIO_Offset *offset_list, int + *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, @@ -34,8 +41,8 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, int *len_list, - int *recv_size, + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, int contig_access_count, @@ -70,12 +77,20 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off; ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL, *fd_end = NULL, *end_offsets = NULL; - int *len_list = NULL, *buf_idx = NULL; + ADIO_Offset *len_list = NULL; + int *buf_idx = NULL; #ifdef HAVE_STATUS_SET_BYTES int bufsize, size; #endif + if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { + ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, + file_ptr_type, offset, status, error_code); + return; + } + + MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); @@ -85,24 +100,26 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, /* only check for interleaving if cb_read isn't disabled */ if (fd->hints->cb_read != ADIOI_HINT_DISABLE) { - /* For this process's request, calculate the list of offsets and - lengths in the file and determine the start and end offsets. */ + /* For this process's request, calculate the list of offsets and + lengths in the file and determine the start and end offsets. */ - /* Note: end_offset points to the last byte-offset that will be accessed. - e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ + /* Note: end_offset points to the last byte-offset that will be accessed. + e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, &end_offset, &contig_access_count); - /* for (i=0; ihints->min_fdomain_size, &fd_size, + fd->hints->striping_unit); /* calculate where the portions of the access requests of this process * are located in terms of the file domains. this could be on the same @@ -257,20 +276,30 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset - offset, ADIO_Offset **offset_list_ptr, int + offset, ADIO_Offset **offset_list_ptr, ADIO_Offset **len_list_ptr, ADIO_Offset *start_offset_ptr, ADIO_Offset *end_offset_ptr, int *contig_access_count_ptr) { - int filetype_size, buftype_size, etype_size; - int i, j, k, frd_size=0, old_frd_size=0, st_index=0; - int n_filetypes, etype_in_filetype; + int filetype_size, etype_size; + unsigned buftype_size; + int i, j, k; + ADIO_Offset i_offset; + ADIO_Offset frd_size=0, old_frd_size=0; + int st_index=0; + ADIO_Offset n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; - int bufsize, sum, n_etypes_in_filetype, size_in_filetype; - int contig_access_count, *len_list, flag, filetype_is_contig; + ADIO_Offset bufsize; + ADIO_Offset sum, n_etypes_in_filetype, size_in_filetype; + int contig_access_count, filetype_is_contig; + ADIO_Offset *len_list; MPI_Aint filetype_extent, filetype_lb; ADIOI_Flatlist_node *flat_file; ADIO_Offset *offset_list, off, end_offset=0, disp; + +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5028, 0, NULL); +#endif /* For this process's request, calculate the list of offsets and lengths in the file and determine the start and end offsets. */ @@ -280,19 +309,19 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype MPI_Type_size(fd->filetype, &filetype_size); MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_lb(fd->filetype, &filetype_lb); - MPI_Type_size(datatype, &buftype_size); + MPI_Type_size(datatype, (int*)&buftype_size); etype_size = fd->etype_size; if ( ! filetype_size ) { *contig_access_count_ptr = 0; *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int)); + *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); /* 2 is for consistency. everywhere I malloc one more than needed */ offset_list = *offset_list_ptr; len_list = *len_list_ptr; offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; + fd->disp + (ADIO_Offset)etype_size * offset; len_list[0] = 0; *start_offset_ptr = offset_list[0]; *end_offset_ptr = offset_list[0] + len_list[0] - 1; @@ -303,14 +332,14 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype if (filetype_is_contig) { *contig_access_count_ptr = 1; *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int)); + *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); /* 2 is for consistency. everywhere I malloc one more than needed */ offset_list = *offset_list_ptr; len_list = *len_list_ptr; offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; - len_list[0] = bufcount * buftype_size; + fd->disp + (ADIO_Offset)etype_size * offset; + len_list[0] = (ADIO_Offset)bufcount * (ADIO_Offset)buftype_size; *start_offset_ptr = offset_list[0]; *end_offset_ptr = offset_list[0] + len_list[0] - 1; @@ -327,31 +356,47 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype while (flat_file->type != fd->filetype) flat_file = flat_file->next; disp = fd->disp; +#ifdef RDCOLL_DEBUG + { + int ii; + DBG_FPRINTF(stderr, "flattened %3d : ", flat_file->count ); + for (ii=0; iicount; ii++) { + DBG_FPRINTF(stderr, "%16qd:%-16qd", flat_file->indices[ii], flat_file->blocklens[ii] ); + } + DBG_FPRINTF(stderr, "\n" ); + } +#endif if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + - flat_file->blocklens[i] >= offset) - { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* frd_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + frd_size = flat_file->blocklens[i]; break; - } + } + if (dist > 0) { + frd_size = dist; + break; } } - } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -367,29 +412,29 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + n_filetypes* (ADIO_Offset)filetype_extent + abs_off_in_filetype; } /* calculate how much space to allocate for offset_list, len_list */ old_frd_size = frd_size; - contig_access_count = i = 0; + contig_access_count = i_offset = 0; j = st_index; - bufsize = buftype_size * bufcount; + bufsize = (ADIO_Offset)buftype_size * (ADIO_Offset)bufcount; frd_size = ADIOI_MIN(frd_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (frd_size) contig_access_count++; - i += frd_size; + i_offset += frd_size; j = (j + 1) % flat_file->count; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* allocate space for offset_list and len_list */ *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); - *len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int)); + *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); /* +1 to avoid a 0-size malloc */ offset_list = *offset_list_ptr; @@ -399,17 +444,17 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype *start_offset_ptr = offset; /* calculated above */ - i = k = 0; + i_offset = k = 0; j = st_index; off = offset; frd_size = ADIOI_MIN(old_frd_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (frd_size) { offset_list[k] = off; len_list[k] = frd_size; k++; } - i += frd_size; + i_offset += frd_size; end_offset = off + frd_size - 1; /* Note: end_offset points to the last byte-offset that will be accessed. @@ -417,7 +462,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype if (off + frd_size < disp + flat_file->indices[j] + flat_file->blocklens[j] + - (ADIO_Offset) n_filetypes*filetype_extent) + n_filetypes* (ADIO_Offset)filetype_extent) { off += frd_size; /* did not reach end of contiguous block in filetype. @@ -425,17 +470,17 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype */ } else { - if (j < (flat_file->count - 1)) j++; - else { - /* hit end of flattened filetype; - * start at beginning again - */ - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + /* hit end of flattened filetype; start at beginning + * again */ } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes* (ADIO_Offset)filetype_extent; + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } } @@ -443,15 +488,18 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; *contig_access_count_ptr = contig_access_count; - *end_offset_ptr = end_offset; + *end_offset_ptr = end_offset; } +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5029, 0, NULL); +#endif } static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, ADIO_Offset + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code) @@ -466,12 +514,14 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype array from a file, where each local array is 8Mbytes, requiring at least another 8Mbytes of temp space is unacceptable. */ - int i, j, m, size, ntimes, max_ntimes, buftype_is_contig; + int i, j, m, ntimes, max_ntimes, buftype_is_contig; ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off; char *read_buf = NULL, *tmp_buf; int *curr_offlen_ptr, *count, *send_size, *recv_size; - int *partial_send, *recd_from_proc, *start_pos, for_next_iter; - int real_size, req_len, flag, for_curr_iter, rank; + int *partial_send, *recd_from_proc, *start_pos; + /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/ + ADIO_Offset real_size, size, for_curr_iter, for_next_iter; + int req_len, flag, rank; MPI_Status status; ADIOI_Flatlist_node *flat_buf=NULL; MPI_Aint buftype_extent; @@ -601,7 +651,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype minus what was satisfied in previous iteration req_size = size corresponding to req_off */ - size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); real_off = off - for_curr_iter; real_size = size + for_curr_iter; @@ -609,7 +659,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype for_next_iter = 0; for (i=0; icomm, requests+j); j++; - /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", - myrank, recv_size[i], myrank+i+100*iter); */ +#ifdef RDCOLL_DEBUG + DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", + myrank, recv_size[i], myrank+i+100*iter); +#endif } } @@ -822,7 +883,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node /* if noncontiguous, to the copies from the recv buffers */ if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, - offset_list, len_list, recv_size, + offset_list, len_list, (unsigned*)recv_size, requests, statuses, recd_from_proc, nprocs, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, @@ -840,9 +901,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node if (recv_size[i]) ADIOI_Free(recv_buf[i]); ADIOI_Free(recv_buf); } +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif } - #define ADIOI_BUF_INCR \ { \ while (buf_incr) { \ @@ -856,7 +919,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ buf_incr -= size_in_buf; \ @@ -868,9 +931,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node { \ while (size) { \ size_in_buf = ADIOI_MIN(size, flat_buf_sz); \ + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \ + ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \ memcpy(((char *) buf) + user_buf_idx, \ &(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \ - recv_buf_idx[p] += size_in_buf; \ + recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \ user_buf_idx += size_in_buf; \ flat_buf_sz -= size_in_buf; \ if (!flat_buf_sz) { \ @@ -880,7 +945,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ size -= size_in_buf; \ @@ -889,11 +954,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIOI_BUF_INCR \ } - static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, int *len_list, - int *recv_size, + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, int contig_access_count, @@ -902,12 +966,15 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIO_Offset *fd_end, MPI_Aint buftype_extent) { + /* this function is only called if buftype is not contig */ - int i, p, flat_buf_idx, size, buf_incr; - int flat_buf_sz, size_in_buf, n_buftypes; + int i, p, flat_buf_idx; + ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; + int n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; - int *curr_from_proc, *done_from_proc, *recv_buf_idx; + /* Not sure unsigned is necessary, but it makes the math safer */ + unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx; ADIOI_UNREFERENCED_ARG(requests); ADIOI_UNREFERENCED_ARG(statuses); @@ -918,9 +985,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node filled into user buffer in previous iterations user_buf_idx = current location in user buffer recv_buf_idx[p] = current location in recv_buf of proc. p */ - curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); - done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int)); - recv_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int)); + curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); + done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); + recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); for (i=0; i < nprocs; i++) { recv_buf_idx[i] = curr_from_proc[i] = 0; @@ -938,7 +1005,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node for (i=0; i done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { - size = (int)ADIOI_MIN(curr_from_proc[p] + len - + size = ADIOI_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; ADIOI_BUF_INCR - buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]); + buf_incr = curr_from_proc[p]+len-done_from_proc[p]; + ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size)); curr_from_proc[p] = done_from_proc[p] + size; ADIOI_BUF_COPY } else { - size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]); - buf_incr = (int)len; - curr_from_proc[p] += size; + size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]); + buf_incr = len; + ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size)); + curr_from_proc[p] += (unsigned) size; ADIOI_BUF_COPY } } else { - curr_from_proc[p] += (int)len; - buf_incr = (int)len; + ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len)); + curr_from_proc[p] += (unsigned) len; + buf_incr = len; ADIOI_BUF_INCR } } else { - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } off += len; diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_str.c b/ompi/mca/io/romio/romio/adio/common/ad_read_str.c index a387ab970a..61d3c73abe 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_read_str.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_read_str.c @@ -12,12 +12,13 @@ { \ if (req_off >= readbuf_off + readbuf_len) { \ readbuf_off = req_off; \ - readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\ + readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\ ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code); \ if (*error_code != MPI_SUCCESS) return; \ } \ while (req_len > readbuf_off + readbuf_len - req_off) { \ + ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\ partial_read = (int) (readbuf_off + readbuf_len - req_off); \ tmp_buf = (char *) ADIOI_Malloc(partial_read); \ memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \ @@ -26,13 +27,14 @@ memcpy(readbuf, tmp_buf, partial_read); \ ADIOI_Free(tmp_buf); \ readbuf_off += readbuf_len-partial_read; \ - readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \ + readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \ end_offset-readbuf_off+1)); \ ADIO_ReadContig(fd, readbuf+partial_read, readbuf_len-partial_read, \ MPI_BYTE, ADIO_EXPLICIT_OFFSET, readbuf_off+partial_read, \ &status1, error_code); \ if (*error_code != MPI_SUCCESS) return; \ } \ + ADIOI_Assert(req_len == (size_t)req_len); \ memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \ } @@ -42,21 +44,25 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, ADIO_Offset offset, ADIO_Status *status, int *error_code) { + + /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, brd_size, frd_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len, partial_read; + ADIO_Offset i_offset, new_brd_size, brd_size, size; + int i, j, k, st_index=0; + unsigned num, bufsize; + int n_etypes_in_filetype; + ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype; + ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size; + int filetype_size, etype_size, buftype_size, partial_read; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; - ADIO_Offset userbuf_off; + ADIO_Offset userbuf_off, req_len, sum; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; char *readbuf, *tmp_buf, *value; - int flag, st_frd_size, st_n_filetypes, readbuf_len; - int new_brd_size, new_frd_size, info_flag, max_bufsize; + int info_flag; + unsigned max_bufsize, readbuf_len; ADIO_Status status1; if (fd->hints->ds_read == ADIOI_HINT_DISABLE) { @@ -90,12 +96,13 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -110,16 +117,16 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; + fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; end_offset = off + bufsize - 1; readbuf_off = off; readbuf = (char *) ADIOI_Malloc(max_bufsize); - readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); + readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1)); /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, @@ -127,15 +134,17 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, if (*error_code != MPI_SUCCESS) return; for (j=0; jcount; i++) { - userbuf_off = j*buftype_extent + flat_buf->indices[i]; - req_off = off; - req_len = flat_buf->blocklens[i]; - ADIOI_BUFFERED_READ - off += flat_buf->blocklens[i]; - } + { + for (i=0; icount; i++) { + userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; + req_off = off; + req_len = flat_buf->blocklens[i]; + ADIOI_BUFFERED_READ + off += flat_buf->blocklens[i]; + } + } - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; @@ -151,29 +160,36 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* frd_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + frd_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + frd_size = dist; + break; } } - } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -189,36 +205,67 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao: read request is within a single flat_file contig + * block e.g. with subarray types that actually describe the whole + * array */ + if (buftype_is_contig && bufsize <= frd_size) { + ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte that + * can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == frd_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } + /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ st_frd_size = frd_size; st_n_filetypes = n_filetypes; - i = 0; + i_offset = 0; j = st_index; off = offset; frd_size = ADIOI_MIN(st_frd_size, bufsize); - while (i < bufsize) { - i += frd_size; + while (i_offset < bufsize) { + i_offset += frd_size; end_offset = off + frd_size - 1; - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } - - off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); readbuf_off = 0; @@ -230,12 +277,12 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, /* contiguous in memory, noncontiguous in file. should be the most common case. */ - i = 0; + i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; frd_size = ADIOI_MIN(st_frd_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (frd_size) { /* TYPE_UB and TYPE_LB can result in frd_size = 0. save system call in such cases */ @@ -244,25 +291,26 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, req_off = off; req_len = frd_size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_READ } - i += frd_size; + i_offset += frd_size; if (off + frd_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent) + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += frd_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by frd_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes*(ADIO_Offset)filetype_extent; + frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } } } @@ -274,7 +322,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -289,7 +337,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_READ } @@ -298,18 +346,18 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, if (size == frd_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } - + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_frd_size = flat_file->blocklens[j]; if (size != brd_size) { - i += size; + i_offset += size; new_brd_size -= size; } } @@ -319,7 +367,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + + i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + flat_buf->indices[k]); new_brd_size = flat_buf->blocklens[k]; if (size != frd_size) { @@ -327,13 +375,14 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, new_frd_size -= size; } } + ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size)); num += size; frd_size = new_frd_size; brd_size = new_brd_size; } } - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c b/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c index 26d00e3e13..e2d74c4064 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c @@ -16,11 +16,13 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int brd_size, frd_size=0, b_index; - int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; + ADIO_Offset size, brd_size, frd_size=0, req_len, sum; + int b_index; + int n_etypes_in_filetype; + ADIO_Offset n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len; + unsigned bufsize, filetype_size, buftype_size, size_in_filetype; + int etype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; @@ -32,17 +34,18 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, ADIOI_Datatype_iscontig(buftype, &buftype_is_contig); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); - MPI_Type_size(fd->filetype, &filetype_size); + MPI_Type_size(fd->filetype, (int*)&filetype_size); if ( ! filetype_size ) { *error_code = MPI_SUCCESS; return; } MPI_Type_extent(fd->filetype, &filetype_extent); - MPI_Type_size(buftype, &buftype_size); + MPI_Type_size(buftype,(int*) &buftype_size); MPI_Type_extent(buftype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* contiguous in buftype and filetype is handled elsewhere */ @@ -62,8 +65,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, end_offset = off + bufsize - 1; /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -73,11 +75,13 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, */ for (b_count=0; b_count < count; b_count++) { for (b_index=0; b_index < flat_buf->count; b_index++) { - userbuf_off = b_count*buftype_extent + + userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + flat_buf->indices[b_index]; req_off = off; req_len = flat_buf->blocklens[b_index]; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); + ADIOI_Assert(req_len == (int) req_len); ADIO_ReadContig(fd, (char *) buf + userbuf_off, req_len, @@ -93,8 +97,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } } - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -104,7 +107,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } else { /* noncontiguous in file */ - int f_index, st_frd_size, st_index = 0, st_n_filetypes; + int f_index, st_index = 0; + ADIO_Offset st_n_filetypes; + ADIO_Offset st_frd_size; int flag; /* First we're going to calculate a set of values for use in all @@ -134,15 +139,15 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; for (f_index=0; f_index < flat_file->count; f_index++) { if (disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent + + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[f_index] >= start_off) { /* this block contains our starting position */ st_index = f_index; - frd_size = (int) (disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent + - flat_file->blocklens[f_index] - start_off); + frd_size = disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + + flat_file->blocklens[f_index] - start_off; flag = 1; break; } @@ -151,9 +156,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = (int) (offset % n_etypes_in_filetype); - size_in_filetype = etype_in_filetype * etype_size; + size_in_filetype = (unsigned)etype_in_filetype * (unsigned)etype_size; sum = 0; for (f_index=0; f_index < flat_file->count; f_index++) { @@ -169,7 +174,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent + + start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype; } @@ -198,9 +203,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[f_index], - bufsize-(int)userbuf_off); + bufsize-(unsigned)userbuf_off); } /* End of calculations. At this point the following values have @@ -213,8 +218,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, */ /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -238,6 +242,8 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, req_off = off; req_len = frd_size; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); + ADIOI_Assert(req_len == (int) req_len); ADIO_ReadContig(fd, (char *) buf + userbuf_off, req_len, @@ -252,7 +258,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, if (off + frd_size < disp + flat_file->indices[f_index] + flat_file->blocklens[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent) + n_filetypes*(ADIO_Offset)filetype_extent) { /* important that this value be correct, as it is * used to set the offset in the fd near the end of @@ -270,14 +276,14 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[f_index], - bufsize-(int)userbuf_off); + bufsize-(unsigned)userbuf_off); } } } else { - int i, tmp_bufsize = 0; + ADIO_Offset i_offset, tmp_bufsize = 0; /* noncontiguous in memory as well as in file */ ADIOI_Flatten_datatype(buftype); @@ -285,7 +291,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, while (flat_buf->type != buftype) flat_buf = flat_buf->next; b_index = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; f_index = st_index; off = start_off; n_filetypes = st_n_filetypes; @@ -294,14 +300,16 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, /* while we haven't read size * count bytes, keep going */ while (tmp_bufsize < bufsize) { - int new_brd_size = brd_size, new_frd_size = frd_size; + ADIO_Offset new_brd_size = brd_size, new_frd_size = frd_size; size = ADIOI_MIN(frd_size, brd_size); if (size) { req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); + ADIOI_Assert(req_len == (int) req_len); ADIO_ReadContig(fd, (char *) buf + userbuf_off, req_len, @@ -322,11 +330,11 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_frd_size = flat_file->blocklens[f_index]; if (size != brd_size) { - i += size; + i_offset += size; new_brd_size -= size; } } @@ -336,8 +344,8 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, b_index = (b_index + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[b_index]); + i_offset = buftype_extent*(buf_count/flat_buf->count) + + flat_buf->indices[b_index]; new_brd_size = flat_buf->blocklens[b_index]; if (size != frd_size) { off += size; diff --git a/ompi/mca/io/romio/romio/adio/common/ad_resize.c b/ompi/mca/io/romio/romio/adio/common/ad_resize.c index 18e1d1ff17..bf38296c58 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_resize.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_resize.c @@ -20,7 +20,8 @@ void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) /* first aggregator performs ftruncate() */ if (rank == fd->hints->ranklist[0]) { - err = ftruncate(fd->fd_sys, size); + ADIOI_Assert(size == (off_t) size); + err = ftruncate(fd->fd_sys, (off_t)size); } /* bcast return value */ diff --git a/ompi/mca/io/romio/romio/adio/common/ad_seek.c b/ompi/mca/io/romio/romio/adio/common/ad_seek.c index 18482f065c..2fc19c54f4 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_seek.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_seek.c @@ -22,10 +22,12 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ADIO_Offset off; ADIOI_Flatlist_node *flat_file; - int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype; + int i; + ADIO_Offset n_etypes_in_filetype, n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; - int size_in_filetype, sum; - int filetype_size, etype_size, filetype_is_contig; + ADIO_Offset size_in_filetype, sum; + unsigned filetype_size; + int etype_size, filetype_is_contig; MPI_Aint filetype_extent; ADIOI_UNREFERENCED_ARG(whence); @@ -33,13 +35,13 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; - if (filetype_is_contig) off = fd->disp + etype_size * offset; + if (filetype_is_contig) off = fd->disp + (ADIO_Offset)etype_size * offset; else { flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; MPI_Type_extent(fd->filetype, &filetype_extent); - MPI_Type_size(fd->filetype, &filetype_size); + MPI_Type_size(fd->filetype, (int*)&filetype_size); if ( ! filetype_size ) { /* Since offset relative to the filetype size, we can't do compute the offset when that result is zero. @@ -49,8 +51,8 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, } n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -64,7 +66,7 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, } /* abs. offset in bytes in the file */ - off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent + + off = fd->disp + n_filetypes * filetype_extent + abs_off_in_filetype; } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_subarray.c b/ompi/mca/io/romio/romio/adio/common/ad_subarray.c index 4f99ae2535..f9a32e3c02 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_subarray.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_subarray.c @@ -32,9 +32,9 @@ int ADIO_Type_create_subarray(int ndims, array_of_subsizes[0], array_of_sizes[0], oldtype, &tmp1); - size = array_of_sizes[0]*extent; + size = (MPI_Aint)array_of_sizes[0]*extent; for (i=2; i=0; i--) { - size *= array_of_sizes[i+1]; + size *= (MPI_Aint)array_of_sizes[i+1]; MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2); MPI_Type_free(&tmp1); tmp1 = tmp2; @@ -74,15 +74,15 @@ int ADIO_Type_create_subarray(int ndims, disps[1] = array_of_starts[ndims-1]; size = 1; for (i=ndims-2; i>=0; i--) { - size *= array_of_sizes[i+1]; - disps[1] += size*array_of_starts[i]; + size *= (MPI_Aint)array_of_sizes[i+1]; + disps[1] += size*(MPI_Aint)array_of_starts[i]; } } disps[1] *= extent; disps[2] = extent; - for (i=0; i #endif +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err = -1, datatype_size, len; + int err = -1, datatype_size; + ADIO_Offset len; static char myname[] = "ADIOI_GEN_WRITECONTIG"; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5036, 0, NULL); +#endif + MPI_Type_size(datatype, &datatype_size); - len = datatype_size * count; + len = (ADIO_Offset)datatype_size * (ADIO_Offset)count; + ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */ if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; @@ -50,7 +59,7 @@ void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err = write(fd->fd_sys, buf, len); + err = write(fd->fd_sys, buf, (unsigned int)len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif @@ -77,4 +86,7 @@ void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count, #endif *error_code = MPI_SUCCESS; +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5037, 0, NULL); +#endif } diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c index f71ec67860..c547b2a367 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c @@ -8,18 +8,22 @@ #include "adio.h" #include "adio_extern.h" +#ifdef AGGREGATION_PROFILE +#include "mpe.h" +#endif + /* prototypes of functions used for collective writes only. */ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, ADIO_Offset + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, @@ -33,7 +37,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, MPI_Aint buftype_extent, int *buf_idx, int *error_code); static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, ADIO_Offset @@ -42,7 +46,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent); -static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements); @@ -72,9 +76,15 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, ADIO_Offset orig_fp, start_offset, end_offset, fd_size, min_st_offset, off; ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL, *fd_end = NULL, *end_offsets = NULL; - int *buf_idx = NULL, *len_list = NULL; + int *buf_idx = NULL; + ADIO_Offset *len_list = NULL; int old_error, tmp_error; + if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { + ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype, + file_ptr_type, offset, status, error_code); + return; + } MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); @@ -136,7 +146,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, if (buftype_is_contig && filetype_is_contig) { if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { - off = fd->disp + (fd->etype_size) * offset; + off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset; ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, off, status, error_code); @@ -156,7 +166,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, - &fd_start, &fd_end, &fd_size); + &fd_start, &fd_end, + fd->hints->min_fdomain_size, &fd_size, + fd->hints->striping_unit); /* calculate what portions of the access requests of this process are @@ -225,6 +237,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL ); #endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5012, 0, NULL); +#endif if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) ) *error_code = old_error; @@ -264,6 +279,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, #endif fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5013, 0, NULL); +#endif } @@ -276,9 +294,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - int *len_list, int contig_access_count, - ADIO_Offset - min_st_offset, ADIO_Offset fd_size, + ADIO_Offset *len_list, int contig_access_count, + ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code) { @@ -291,7 +308,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype array to a file, where each local array is 8Mbytes, requiring at least another 8Mbytes of temp space is unacceptable. */ - int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig; + /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/ + ADIO_Offset size=0; + int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig; ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off; char *write_buf=NULL; int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size; @@ -312,7 +331,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype That gives the no. of communication phases as well. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); coll_bufsize = atoi(value); ADIOI_Free(value); @@ -421,7 +440,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0; - size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); for (i=0; i < nprocs; i++) { if (others_req[i].count) { @@ -445,12 +464,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype } if (req_off < off + size) { count[i]++; + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off)); MPI_Address(write_buf+req_off-off, &(others_req[i].mem_ptrs[j])); - recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size - - req_off, req_len)); + ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off)); + recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, + (unsigned)req_len)); - if (off+size-req_off < req_len) + if (off+size-req_off < (unsigned)req_len) { partial_recv[i] = (int) (off + size - req_off); @@ -494,7 +515,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype if (count[i]) flag = 1; if (flag) { - ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + ADIOI_Assert(size == (int)size); + ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, error_code); if (*error_code != MPI_SUCCESS) return; } @@ -537,7 +559,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype */ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, @@ -558,7 +580,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, MPI_Request *requests, *send_req; MPI_Datatype *recv_types; MPI_Status *statuses, status; - int *srt_len, sum, sum_recv; + int *srt_len, sum; ADIO_Offset *srt_off; static char myname[] = "ADIOI_W_EXCHANGE_DATA"; @@ -617,26 +639,27 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, } ADIOI_Free(tmp_len); -/* check if there are any holes */ + /* check if there are any holes. If yes, must do read-modify-write. + * holes can be in three places. 'middle' is what you'd expect: the + * processes are operating on noncontigous data. But holes can also show + * up at the beginning or end of the file domain (see John Bent ROMIO REQ + * #835). Missing these holes would result in us writing more data than + * recieved by everyone else. */ + *hole = 0; - for (i=0; i srt_len[0]) srt_len[0] = new_len; + } + else + break; } - if (size > sum_recv) *hole = 1; + if (i < sum || size != srt_len[0]) /* hole in middle or end */ + *hole = 1; } ADIOI_Free(srt_off); @@ -687,6 +710,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, /* post sends. if buftype_is_contig, data can be directly sent from user buf at location given by buf_idx. else use send_buf. */ +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5032, 0, NULL); +#endif if (buftype_is_contig) { j = 0; for (i=0; i < nprocs; i++) @@ -761,6 +787,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses); #endif +#ifdef AGGREGATION_PROFILE + MPE_Log_event (5033, 0, NULL); +#endif ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { @@ -770,7 +799,6 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, } } - #define ADIOI_BUF_INCR \ { \ while (buf_incr) { \ @@ -784,7 +812,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ buf_incr -= size_in_buf; \ @@ -796,6 +824,8 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, { \ while (size) { \ size_in_buf = ADIOI_MIN(size, flat_buf_sz); \ + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \ + ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \ memcpy(&(send_buf[p][send_buf_idx[p]]), \ ((char *) buf) + user_buf_idx, size_in_buf); \ send_buf_idx[p] += size_in_buf; \ @@ -808,7 +838,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, n_buftypes++; \ } \ user_buf_idx = flat_buf->indices[flat_buf_idx] + \ - n_buftypes*buftype_extent; \ + (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \ } \ size -= size_in_buf; \ @@ -819,9 +849,11 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, + + static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, char **send_buf, ADIO_Offset - *offset_list, int *len_list, int *send_size, + *offset_list, ADIO_Offset *len_list, int *send_size, MPI_Request *requests, int *sent_to_proc, int nprocs, int myrank, int contig_access_count, @@ -833,8 +865,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node { /* this function is only called if buftype is not contig */ - int i, p, flat_buf_idx, size; - int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes; + int i, p, flat_buf_idx; + ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size; + int jj, n_buftypes; ADIO_Offset off, len, rem_len, user_buf_idx; /* curr_to_proc[p] = amount of data sent to proc. p that has already @@ -861,7 +894,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = (int)ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR - buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]); + ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p])); + buf_incr = curr_to_proc[p] + len - done_to_proc[p]; + ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size)); curr_to_proc[p] = done_to_proc[p] + size; ADIOI_BUF_COPY } else { - size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); - buf_incr = (int)len; + size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]); + buf_incr = len; + ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size)); curr_to_proc[p] += size; ADIOI_BUF_COPY } @@ -902,13 +938,14 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node } } else { - curr_to_proc[p] += (int)len; - buf_incr = (int)len; + ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len)); + curr_to_proc[p] += len; + buf_incr = len; ADIOI_BUF_INCR } } else { - buf_incr = (int)len; + buf_incr = len; ADIOI_BUF_INCR } off += len; @@ -921,7 +958,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node -static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements) { diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c b/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c index e36996202a..091b8ec42b 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c @@ -27,16 +27,18 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; + int j, k, err=-1, st_index=0; + ADIO_Offset fwr_size=0, bwr_size, new_bwr_size, new_fwr_size, i_offset, num; + unsigned bufsize; + int n_etypes_in_filetype; + ADIO_Offset n_filetypes, etype_in_filetype, size, sum; + ADIO_Offset abs_off_in_filetype=0, size_in_filetype; int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent, indx; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset off, disp; - int flag, new_bwr_size, new_fwr_size, err_flag=0; - static char myname[] = "ADIOI_PVFS_WRITESTRIDED"; + int flag, err_flag=0; + static char myname[] = "ADIOI_NOLOCK_WRITESTRIDED"; #ifdef IO_DEBUG int rank,nprocs; #endif @@ -70,6 +72,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; if (!buftype_is_contig && filetype_is_contig) { @@ -100,6 +103,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, * is also handled. */ for (j=0; jcount; i++) { if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) { /* there is data in the buffer; write out the buffer so far */ @@ -134,12 +138,14 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, rank, nprocs, off, flat_buf->blocklens[i]); #endif + ADIOI_Assert(flat_buf->blocklens[i] == (unsigned)flat_buf->blocklens[i]); + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]) == (ADIO_Offset)((MPIR_Upint)buf + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i])); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err = write(fd->fd_sys, - ((char *) buf) + j*buftype_extent + flat_buf->indices[i], - flat_buf->blocklens[i]); + ((char *) buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i], + (unsigned)flat_buf->blocklens[i]); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif @@ -206,14 +212,15 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes = -1; flag = 0; while (!flag) { + int i; n_filetypes++; for (i=0; icount; i++) { if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; fwr_size = disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] - offset; flag = 1; break; @@ -222,9 +229,10 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, } } else { + int i; n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -240,7 +248,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype; } if (buftype_is_contig && !filetype_is_contig) { @@ -248,11 +256,11 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, /* contiguous in memory, noncontiguous in file. should be the most common case. */ - i = 0; + i_offset = 0; j = st_index; off = offset; fwr_size = ADIOI_MIN(fwr_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (fwr_size) { /* TYPE_UB and TYPE_LB can result in fwr_size = 0. save system call in such cases */ @@ -271,16 +279,16 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif - err = write(fd->fd_sys, ((char *) buf) + i, fwr_size); + err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif if (err == -1) err_flag = 1; } - i += fwr_size; + i_offset += fwr_size; if (off + fwr_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent) + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += fwr_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by fwr_size. */ @@ -291,8 +299,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes++; } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } } } @@ -327,6 +335,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif + ADIOI_Assert(size == (size_t) size); + ADIOI_Assert(off == (off_t) off); err = write(fd->fd_sys, ((char *) buf) + indx, size); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); @@ -346,7 +356,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count, } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[j]; if (size != bwr_size) { diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_str.c b/ompi/mca/io/romio/romio/adio/common/ad_write_str.c index 8ad385a221..b13fb183c0 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_write_str.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_write_str.c @@ -24,7 +24,7 @@ } \ } \ writebuf_off = req_off; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ @@ -36,7 +36,8 @@ return; \ } \ } \ - write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\ memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\ while (write_sz != req_len) { \ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ @@ -52,7 +53,7 @@ req_len -= write_sz; \ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ @@ -84,9 +85,10 @@ return; \ } \ writebuf_off = req_off; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ } \ - write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \ + ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\ memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\ while (write_sz != req_len) { \ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ @@ -101,40 +103,41 @@ req_len -= write_sz; \ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ - writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ write_sz = ADIOI_MIN(req_len, writebuf_len); \ memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\ } \ } - - void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { + /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, bwr_size, fwr_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; + ADIO_Offset i_offset, sum, size_in_filetype; + int i, j, k, st_index=0; + int n_etypes_in_filetype; + ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes; ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len; + int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; char *writebuf; - int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz; + unsigned bufsize, writebuf_len, max_bufsize, write_sz; ADIO_Status status1; - int new_bwr_size, new_fwr_size, max_bufsize; + ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len; static char myname[] = "ADIOI_GEN_WriteStrided"; if (fd->hints->ds_write == ADIOI_HINT_DISABLE) { /* if user has disabled data sieving on reads, use naive * approach instead. */ + ADIOI_GEN_WriteStrided_naive(fd, buf, count, @@ -146,6 +149,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, return; } + *error_code = MPI_SUCCESS; /* changed below if error */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -162,6 +166,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* get max_bufsize from the info object. */ @@ -177,26 +182,28 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; + fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; end_offset = off + bufsize - 1; writebuf_off = off; writebuf = (char *) ADIOI_Malloc(max_bufsize); - writebuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1)); + writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1)); /* if atomicity is true, lock the region to be accessed */ if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); for (j=0; jcount; i++) { - userbuf_off = j*buftype_extent + flat_buf->indices[i]; - req_off = off; - req_len = flat_buf->blocklens[i]; - ADIOI_BUFFERED_WRITE_WITHOUT_READ - off += flat_buf->blocklens[i]; - } + { + for (i=0; icount; i++) { + userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; + req_off = off; + req_len = flat_buf->blocklens[i]; + ADIOI_BUFFERED_WRITE_WITHOUT_READ + off += flat_buf->blocklens[i]; + } + } /* write the buffer out finally */ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, @@ -220,29 +227,36 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { - offset = fd->fp_ind; /* in bytes */ - n_filetypes = -1; - flag = 0; - while (!flag) { - n_filetypes++; - for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - >= offset) { - st_index = i; - fwr_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - offset); - flag = 1; - break; - } - } - } - } + /* Wei-keng reworked type processing to be a bit more efficient */ + offset = fd->fp_ind - disp; + n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; + offset -= (ADIO_Offset)n_filetypes * filetype_extent; + /* now offset is local to this extent */ + + /* find the block where offset is located, skip blocklens[i]==0 */ + for (i=0; icount; i++) { + ADIO_Offset dist; + if (flat_file->blocklens[i] == 0) continue; + dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; + /* fwr_size is from offset to the end of block i */ + if (dist == 0) { + i++; + offset = flat_file->indices[i]; + fwr_size = flat_file->blocklens[i]; + break; + } + if (dist > 0) { + fwr_size = dist; + break; + } + } + st_index = i; /* starting index in flat_file->indices[] */ + offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; + } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -258,32 +272,65 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + abs_off_in_filetype; } start_off = offset; + /* Wei-keng Liao:write request is within single flat_file contig block*/ + /* this could happen, for example, with subarray types that are + * actually fairly contiguous */ + if (buftype_is_contig && bufsize <= fwr_size) { + ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + offset, status, error_code); + + if (file_ptr_type == ADIO_INDIVIDUAL) { + /* update MPI-IO file pointer to point to the first byte + * that can be accessed in the fileview. */ + fd->fp_ind = offset + bufsize; + if (bufsize == fwr_size) { + do { + st_index++; + if (st_index == flat_file->count) { + st_index = 0; + n_filetypes++; + } + } while (flat_file->blocklens[st_index] == 0); + fd->fp_ind = disp + flat_file->indices[st_index] + + (ADIO_Offset)n_filetypes*filetype_extent; + } + } + fd->fp_sys_posn = -1; /* set it to null. */ +#ifdef HAVE_STATUS_SET_BYTES + MPIR_Status_set_bytes(status, datatype, bufsize); +#endif + return; + } + /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/ st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - i = 0; + i_offset = 0; j = st_index; off = offset; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); - while (i < bufsize) { - i += fwr_size; + while (i_offset < bufsize) { + i_offset += fwr_size; end_offset = off + fwr_size - 1; - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } - off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + off = disp + flat_file->indices[j] + + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock the region to be accessed */ @@ -300,39 +347,41 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, /* contiguous in memory, noncontiguous in file. should be the most common case. */ - i = 0; + i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); - while (i < bufsize) { + while (i_offset < bufsize) { if (fwr_size) { /* TYPE_UB and TYPE_LB can result in fwr_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); - err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/ + err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/ req_off = off; req_len = fwr_size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_WRITE } - i += fwr_size; + i_offset += fwr_size; if (off + fwr_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent) + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += fwr_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by fwr_size. */ else { - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; - } + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); + n_filetypes*(ADIO_Offset)filetype_extent; + fwr_size = ADIOI_MIN(flat_file->blocklens[j], + bufsize-i_offset); } } } @@ -344,7 +393,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -355,11 +404,11 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, size = ADIOI_MIN(fwr_size, bwr_size); if (size) { /* lseek(fd->fd_sys, off, SEEK_SET); - err = write(fd->fd_sys, ((char *) buf) + i, size); */ + err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */ req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; ADIOI_BUFFERED_WRITE } @@ -368,18 +417,19 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, if (size == fwr_size) { /* reached end of contiguous block in file */ - if (j < (flat_file->count - 1)) j++; - else { - j = 0; - n_filetypes++; + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; + while (flat_file->blocklens[j]==0) { + j = (j+1) % flat_file->count; + n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[j]; if (size != bwr_size) { - i += size; + i_offset += size; new_bwr_size -= size; } } @@ -389,8 +439,8 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c b/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c index c8247fb888..6cd859e952 100644 --- a/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c +++ b/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c @@ -17,11 +17,13 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, ADIOI_Flatlist_node *flat_buf, *flat_file; /* bwr == buffer write; fwr == file write */ - int bwr_size, fwr_size=0, b_index; - int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, buftype_size, req_len; + ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype; + int b_index; + unsigned bufsize; + int n_etypes_in_filetype; + ADIO_Offset size, n_filetypes, etype_in_filetype; + ADIO_Offset abs_off_in_filetype=0, req_len; + int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; @@ -44,6 +46,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, MPI_Type_extent(buftype, &buftype_extent); etype_size = fd->etype_size; + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* contiguous in buftype and filetype is handled elsewhere */ @@ -57,14 +60,13 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, while (flat_buf->type != buftype) flat_buf = flat_buf->next; off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : - fd->disp + etype_size * offset; + fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; end_offset = off + bufsize - 1; /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -74,14 +76,16 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, */ for (b_count=0; b_count < count; b_count++) { for (b_index=0; b_index < flat_buf->count; b_index++) { - userbuf_off = b_count*buftype_extent + + userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + flat_buf->indices[b_index]; req_off = off; req_len = flat_buf->blocklens[b_index]; + ADIOI_Assert(req_len == (int) req_len); + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIO_WriteContig(fd, (char *) buf + userbuf_off, - req_len, + (int)req_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, @@ -94,8 +98,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } } - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -105,7 +108,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } else { /* noncontiguous in file */ - int f_index, st_fwr_size, st_index = 0, st_n_filetypes; + int f_index, st_index = 0; + ADIO_Offset st_fwr_size, st_n_filetypes; int flag; /* First we're going to calculate a set of values for use in all @@ -135,15 +139,15 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; for (f_index=0; f_index < flat_file->count; f_index++) { if (disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent + + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[f_index] >= start_off) { /* this block contains our starting position */ st_index = f_index; - fwr_size = (int) (disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent + - flat_file->blocklens[f_index] - start_off); + fwr_size = disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + + flat_file->blocklens[f_index] - start_off; flag = 1; break; } @@ -152,8 +156,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } else { n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -170,7 +174,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent + + start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype; } @@ -199,9 +203,9 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], - bufsize-(int)userbuf_off); + bufsize-(unsigned)userbuf_off); } /* End of calculations. At this point the following values have @@ -214,8 +218,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, */ /* if atomicity is true, lock (exclusive) the region to be accessed */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } @@ -239,9 +242,11 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, req_off = off; req_len = fwr_size; + ADIOI_Assert(req_len == (int) req_len); + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIO_WriteContig(fd, (char *) buf + userbuf_off, - req_len, + (int)req_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, @@ -253,7 +258,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, if (off + fwr_size < disp + flat_file->indices[f_index] + flat_file->blocklens[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent) + n_filetypes*(ADIO_Offset)filetype_extent) { /* important that this value be correct, as it is * used to set the offset in the fd near the end of @@ -271,14 +276,14 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], - bufsize-(int)userbuf_off); + bufsize-(unsigned)userbuf_off); } } } else { - int i, tmp_bufsize = 0; + ADIO_Offset i_offset, tmp_bufsize = 0; /* noncontiguous in memory as well as in file */ ADIOI_Flatten_datatype(buftype); @@ -286,7 +291,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, while (flat_buf->type != buftype) flat_buf = flat_buf->next; b_index = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i_offset = flat_buf->indices[0]; f_index = st_index; off = start_off; n_filetypes = st_n_filetypes; @@ -295,17 +300,19 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, /* while we haven't read size * count bytes, keep going */ while (tmp_bufsize < bufsize) { - int new_bwr_size = bwr_size, new_fwr_size = fwr_size; + ADIO_Offset new_bwr_size = bwr_size, new_fwr_size = fwr_size; size = ADIOI_MIN(fwr_size, bwr_size); if (size) { req_off = off; req_len = size; - userbuf_off = i; + userbuf_off = i_offset; + ADIOI_Assert(req_len == (int) req_len); + ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIO_WriteContig(fd, (char *) buf + userbuf_off, - req_len, + (int)req_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, @@ -323,11 +330,11 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } off = disp + flat_file->indices[f_index] + - (ADIO_Offset) n_filetypes*filetype_extent; + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[f_index]; if (size != bwr_size) { - i += size; + i_offset += size; new_bwr_size -= size; } } @@ -337,8 +344,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, b_index = (b_index + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[b_index]); + i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + + flat_buf->indices[b_index]; new_bwr_size = flat_buf->blocklens[b_index]; if (size != fwr_size) { off += size; @@ -352,8 +359,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count, } /* unlock the file region if we locked it */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && - (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) { ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); } diff --git a/ompi/mca/io/romio/romio/adio/common/byte_offset.c b/ompi/mca/io/romio/romio/adio/common/byte_offset.c index e23acf34e9..ce88cf1893 100644 --- a/ompi/mca/io/romio/romio/adio/common/byte_offset.c +++ b/ompi/mca/io/romio/romio/adio/common/byte_offset.c @@ -14,10 +14,10 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp) { ADIOI_Flatlist_node *flat_file; - int i, sum, n_etypes_in_filetype, size_in_filetype; - int n_filetypes, etype_in_filetype; - ADIO_Offset abs_off_in_filetype=0; - int filetype_size, etype_size, filetype_is_contig; + int i; + ADIO_Offset n_filetypes, etype_in_filetype, sum, abs_off_in_filetype=0, size_in_filetype; + unsigned n_etypes_in_filetype, filetype_size, etype_size; + int filetype_is_contig; MPI_Aint filetype_extent; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); @@ -29,10 +29,10 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp) flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; - MPI_Type_size(fd->filetype, &filetype_size); + MPI_Type_size(fd->filetype, (int*)&filetype_size); n_etypes_in_filetype = filetype_size/etype_size; - n_filetypes = (int) (offset / n_etypes_in_filetype); - etype_in_filetype = (int) (offset % n_etypes_in_filetype); + n_filetypes = offset / n_etypes_in_filetype; + etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; @@ -47,6 +47,6 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp) /* abs. offset in bytes in the file */ MPI_Type_extent(fd->filetype, &filetype_extent); - *disp = fd->disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; + *disp = fd->disp + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + abs_off_in_filetype; } } diff --git a/ompi/mca/io/romio/romio/adio/common/cb_config_list.c b/ompi/mca/io/romio/romio/adio/common/cb_config_list.c index 1f18f6a0cc..bb2c1eeff9 100644 --- a/ompi/mca/io/romio/romio/adio/common/cb_config_list.c +++ b/ompi/mca/io/romio/romio/adio/common/cb_config_list.c @@ -35,7 +35,7 @@ #undef CB_CONFIG_LIST_DEBUG /* a couple of globals keep things simple */ -static int cb_config_list_keyval = MPI_KEYVAL_INVALID; +int ADIOI_cb_config_list_keyval = MPI_KEYVAL_INVALID; static char *yylval; static char *token_ptr; @@ -83,7 +83,7 @@ int ADIOI_cb_bcast_rank_map(ADIO_File fd) * FS-INDEP. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", fd->hints->cb_nodes); - MPI_Info_set(fd->info, "cb_nodes", value); + ADIOI_Info_set(fd->info, "cb_nodes", value); ADIOI_Free(value); return 0; @@ -111,14 +111,16 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array array = NULL; int alloc_size; - if (cb_config_list_keyval == MPI_KEYVAL_INVALID) { + if (ADIOI_cb_config_list_keyval == MPI_KEYVAL_INVALID) { + /* cleaned up by ADIOI_End_call */ MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, (MPI_Delete_function *) ADIOI_cb_delete_name_array, - &cb_config_list_keyval, NULL); + &ADIOI_cb_config_list_keyval, NULL); } else { - MPI_Attr_get(comm, cb_config_list_keyval, (void *) &array, &found); + MPI_Attr_get(comm, ADIOI_cb_config_list_keyval, (void *) &array, &found); if (found) { + ADIOI_Assert(array != NULL); *arrayp = array; return 0; } @@ -231,8 +233,8 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, * it next time an open is performed on this same comm, and on the * dupcomm, so we can use it in I/O operations. */ - MPI_Attr_put(comm, cb_config_list_keyval, array); - MPI_Attr_put(dupcomm, cb_config_list_keyval, array); + MPI_Attr_put(comm, ADIOI_cb_config_list_keyval, array); + MPI_Attr_put(dupcomm, ADIOI_cb_config_list_keyval, array); *arrayp = array; return 0; } @@ -362,7 +364,7 @@ int ADIOI_cb_config_list_parse(char *config_list, /* ADIOI_cb_copy_name_array() - attribute copy routine */ int ADIOI_cb_copy_name_array(MPI_Comm comm, - int *keyval, + int keyval, void *extra, void *attr_in, void **attr_out, @@ -371,11 +373,11 @@ int ADIOI_cb_copy_name_array(MPI_Comm comm, ADIO_cb_name_array array; ADIOI_UNREFERENCED_ARG(comm); - ADIOI_UNREFERENCED_ARG(keyval); + ADIOI_UNREFERENCED_ARG(keyval); ADIOI_UNREFERENCED_ARG(extra); array = (ADIO_cb_name_array) attr_in; - array->refct++; + if (array != NULL) array->refct++; *attr_out = attr_in; *flag = 1; /* make a copy in the new communicator */ @@ -386,17 +388,17 @@ int ADIOI_cb_copy_name_array(MPI_Comm comm, /* ADIOI_cb_delete_name_array() - attribute destructor */ int ADIOI_cb_delete_name_array(MPI_Comm comm, - int *keyval, + int keyval, void *attr_val, void *extra) { ADIO_cb_name_array array; ADIOI_UNREFERENCED_ARG(comm); - ADIOI_UNREFERENCED_ARG(keyval); ADIOI_UNREFERENCED_ARG(extra); array = (ADIO_cb_name_array) attr_val; + ADIOI_Assert(array != NULL); array->refct--; if (array->refct <= 0) { @@ -411,7 +413,6 @@ int ADIOI_cb_delete_name_array(MPI_Comm comm, if (array->names != NULL) ADIOI_Free(array->names); ADIOI_Free(array); } - return MPI_SUCCESS; } @@ -679,19 +680,32 @@ static int get_max_procs(int cb_nodes) * * Returns a token of types defined at top of this file. */ +#ifdef ROMIO_BGL +/* On BlueGene, the ',' character shows up in get_processor_name, so we have to + * use a different delimiter */ +#define COLON ':' +#define COMMA ';' +#define DELIMS ":;" +#else +/* these tokens work for every other platform */ +#define COLON ':' +#define COMMA ',' +#define DELIMS ":," +#endif + static int cb_config_list_lex(void) { int slen; if (*token_ptr == '\0') return AGG_EOS; - slen = (int)strcspn(token_ptr, ":,"); + slen = (int)strcspn(token_ptr, DELIMS); - if (*token_ptr == ':') { + if (*token_ptr == COLON) { token_ptr++; return AGG_COLON; } - if (*token_ptr == ',') { + if (*token_ptr == COMMA) { token_ptr++; return AGG_COMMA; } diff --git a/ompi/mca/io/romio/romio/adio/common/eof_offset.c b/ompi/mca/io/romio/romio/adio/common/eof_offset.c index e8ddbd3072..981efa087a 100644 --- a/ompi/mca/io/romio/romio/adio/common/eof_offset.c +++ b/ompi/mca/io/romio/romio/adio/common/eof_offset.c @@ -13,9 +13,10 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) { - int error_code, filetype_is_contig, etype_size, filetype_size; - ADIO_Offset fsize, disp, sum=0, size_in_file; - int n_filetypes, flag, i, rem; + unsigned filetype_size; + int error_code, filetype_is_contig, etype_size; + ADIO_Offset fsize, disp, sum=0, size_in_file, n_filetypes, rem; + int flag, i; ADIO_Fcntl_t *fcntl_struct; MPI_Aint filetype_extent; ADIOI_Flatlist_node *flat_file; @@ -43,7 +44,7 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) while (flat_file->type != fd->filetype) flat_file = flat_file->next; - MPI_Type_size(fd->filetype, &filetype_size); + MPI_Type_size(fd->filetype, (int*)&filetype_size); MPI_Type_extent(fd->filetype, &filetype_extent); disp = fd->disp; @@ -55,14 +56,14 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) for (i=0; icount; i++) { sum += flat_file->blocklens[i]; if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] >= fsize) { if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent >= fsize) + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent >= fsize) sum -= flat_file->blocklens[i]; else { - rem = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + rem = (disp + flat_file->indices[i] + + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] - fsize); sum -= rem; } @@ -71,7 +72,7 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) } } } - size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum; + size_in_file = n_filetypes*(ADIO_Offset)filetype_size + sum; *eof_offset = (size_in_file+etype_size-1)/etype_size; /* ceiling division */ } } diff --git a/ompi/mca/io/romio/romio/adio/common/flatten.c b/ompi/mca/io/romio/romio/adio/common/flatten.c index e644d2b138..06a456cc41 100644 --- a/ompi/mca/io/romio/romio/adio/common/flatten.c +++ b/ompi/mca/io/romio/romio/adio/common/flatten.c @@ -1,8 +1,5 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * $Id: flatten.c,v 1.24 2006/07/05 20:40:13 robl Exp $ - * +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,14 +9,15 @@ /* #ifdef MPISGI #include "mpisgi2.h" #endif */ +#ifdef ROMIO_INSIDE_MPICH2 +#include "mpid_datatype.h" +#endif + +#ifdef USE_DBG_LOGGING + #define FLATTEN_DEBUG 1 +#endif void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type); -void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat, - int old_type_start, - int old_type_end, - int new_type_start, - ADIO_Offset offset_adjustment); - /* flatten datatype and add it to Flatlist */ void ADIOI_Flatten_datatype(MPI_Datatype datatype) { @@ -29,16 +27,25 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) int curr_index=0, is_contig; ADIOI_Flatlist_node *flat, *prev=0; +#ifdef ROMIO_INSIDE_MPICH2 + if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */ +#endif /* check if necessary to flatten. */ /* is it entirely contiguous? */ ADIOI_Datatype_iscontig(datatype, &is_contig); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig); + #endif if (is_contig) return; /* has it already been flattened? */ flat = ADIOI_Flatlist; while (flat) { if (flat->type == datatype) { + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype); + #endif return; } else { @@ -58,39 +65,42 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) flat->indices = NULL; flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index); -#if 0 - FPRINTF(stderr, "cur_idx = %d\n", curr_index); +#ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %#X, cur_idx = %#X\n",flat->count,curr_index); #endif -/* FPRINTF(stderr, "%d\n", flat->count);*/ +/* DBG_FPRINTF(stderr, "%d\n", flat->count);*/ if (flat->count) { - flat->blocklens = (int *) ADIOI_Malloc(flat->count * sizeof(int)); - flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * \ - sizeof(ADIO_Offset)); + flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset)); + flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset)); } curr_index = 0; #ifdef HAVE_MPIR_TYPE_FLATTEN flatten_idx = (MPI_Aint) flat->count; MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n"); + #endif #else ADIOI_Flatten(datatype, flat, 0, &curr_index); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n"); + #endif ADIOI_Optimize_flattened(flat); #endif /* debug */ -#if 0 +#ifdef FLATTEN_DEBUG { int i; - FPRINTF(stderr, "blens: "); for (i=0; icount; i++) - FPRINTF(stderr, "%d ", flat->blocklens[i]); - FPRINTF(stderr, "\n\n"); - FPRINTF(stderr, "indices: "); - for (i=0; icount; i++) - FPRINTF(stderr, "%ld ", (long) flat->indices[i]); - FPRINTF(stderr, "\n\n"); - } + DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n", + i, + flat->blocklens[i], + flat->indices[i] + ); + } #endif } @@ -103,22 +113,46 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ADIO_Offset st_offset, int *curr_index) { int i, j, k, m, n, num, basic_num, prev_index; - int top_count, combiner, old_combiner, old_is_contig; - int old_size, nints, nadds, ntypes, old_nints, old_nadds, old_ntypes; - MPI_Aint old_extent; + int combiner, old_combiner, old_is_contig; + int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes; + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset top_count; + /* By using unsigned we avoid >2G integer arithmetic problems */ + unsigned old_size; + MPI_Aint old_extent;/* Assume extents are non-negative */ int *ints; - MPI_Aint *adds; + MPI_Aint *adds; /* Make no assumptions about +/- sign on these */ MPI_Datatype *types; - MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner); ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int)); adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint)); types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype)); MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#X\n",st_offset,*curr_index); + DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes); + for(i=0; i< nints; ++i) + { + DBG_FPRINTF(stderr,"ADIOI_Flatten:: ints[%d]=%#X\n",i,ints[i]); + } + for(i=0; i< nadds; ++i) + { + DBG_FPRINTF(stderr,"ADIOI_Flatten:: adds[%d]="MPI_AINT_FMT_HEX_SPEC"\n",i,adds[i]); + } + for(i=0; i< ntypes; ++i) + { + DBG_FPRINTF(stderr,"ADIOI_Flatten:: types[%d]=%#llX\n",i,(unsigned long long)(unsigned long)types[i]); + } + if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */ + #endif switch (combiner) { #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP case MPI_COMBINER_DUP: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n"); + #endif MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); @@ -131,6 +165,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, { int dims = ints[0]; MPI_Datatype stype; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n"); + #endif ADIO_Type_create_subarray(dims, &ints[1], /* sizes */ @@ -149,6 +186,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, { int dims = ints[2]; MPI_Datatype dtype; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n"); + #endif ADIO_Type_create_darray(ints[0], /* size */ ints[1], /* rank */ @@ -160,12 +200,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ints[4*dims+3], /* order */ types[0], &dtype); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n", + 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index); + #endif ADIOI_Flatten(dtype, flat, st_offset, curr_index); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n", + 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index); + #endif MPI_Type_free(&dtype); } break; #endif case MPI_COMBINER_CONTIGUOUS: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -179,8 +230,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* simplest case, made up of basic or contiguous types */ j = *curr_index; flat->indices[j] = st_offset; - MPI_Type_size(types[0], &old_size); + MPI_Type_size(types[0], (int*)&old_size); flat->blocklens[j] = top_count * old_size; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]); + #endif (*curr_index)++; } else { @@ -192,8 +246,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_extent(types[0], &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]); + #endif j++; } } @@ -202,6 +259,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, break; case MPI_COMBINER_VECTOR: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -213,19 +273,24 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (prev_index == *curr_index) { /* simplest case, vector of basic or contiguous types */ + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1], stride = ints[2]; j = *curr_index; flat->indices[j] = st_offset; - MPI_Type_size(types[0], &old_size); - flat->blocklens[j] = ints[1] * old_size; + MPI_Type_size(types[0], (int*)&old_size); + flat->blocklens[j] = blocklength * old_size; for (i=j+1; iindices[i] = flat->indices[i-1] + - (unsigned) ints[2] * (unsigned) old_size; + flat->indices[i] = flat->indices[i-1] + stride * old_size; flat->blocklens[i] = flat->blocklens[j]; } *curr_index = i; } else { /* vector of noncontiguous derived types */ + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1], stride = ints[2]; j = *curr_index; num = *curr_index - prev_index; @@ -233,9 +298,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* The noncontiguous types have to be replicated blocklen times and then strided. Replicate the first one. */ MPI_Type_extent(types[0], &old_extent); - for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -246,8 +311,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; for (i=1; iindices[j] = flat->indices[j-num] + ints[2] - *old_extent; + flat->indices[j] = flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -258,6 +322,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, case MPI_COMBINER_HVECTOR: case MPI_COMBINER_HVECTOR_INTEGER: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -269,10 +336,13 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (prev_index == *curr_index) { /* simplest case, vector of basic or contiguous types */ + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1]; j = *curr_index; flat->indices[j] = st_offset; - MPI_Type_size(types[0], &old_size); - flat->blocklens[j] = ints[1] * old_size; + MPI_Type_size(types[0], (int*)&old_size); + flat->blocklens[j] = blocklength * old_size; for (i=j+1; iindices[i] = flat->indices[i-1] + adds[0]; flat->blocklens[i] = flat->blocklens[j]; @@ -281,6 +351,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } else { /* vector of noncontiguous derived types */ + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1]; j = *curr_index; num = *curr_index - prev_index; @@ -288,9 +361,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* The noncontiguous types have to be replicated blocklen times and then strided. Replicate the first one. */ MPI_Type_extent(types[0], &old_extent); - for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -311,6 +384,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, break; case MPI_COMBINER_INDEXED: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -319,15 +395,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) - ADIOI_Flatten(types[0], flat, - st_offset+ints[top_count+1]*old_extent, curr_index); + { + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset stride = ints[top_count+1]; + ADIOI_Flatten(types[0], flat, + st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index); + } if (prev_index == *curr_index) { /* simplest case, indexed type made up of basic or contiguous types */ j = *curr_index; for (i=j; iindices[i] = st_offset + ints[top_count+1+i-j]*old_extent; - flat->blocklens[i] = (int) (ints[1+i-j]*old_extent); + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j]; + flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; + flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent; } *curr_index = i; } @@ -342,7 +426,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, and then strided. Replicate the first one. */ for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -354,15 +438,17 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; prev_index = *curr_index; for (m=0; mindices[j] = flat->indices[j-num] + - (ints[top_count+1+i]-ints[top_count+i])*old_extent; + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i]; + flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } *curr_index = j; for (m=1; mindices[j] = flat->indices[j-basic_num] + old_extent; + flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-basic_num]; j++; } @@ -373,6 +459,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, break; case MPI_COMBINER_INDEXED_BLOCK: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -381,15 +470,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) - ADIOI_Flatten(types[0], flat, - st_offset+ints[1+1]*old_extent, curr_index); + { + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset stride = ints[1+1]; + ADIOI_Flatten(types[0], flat, + st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index); + } if (prev_index == *curr_index) { /* simplest case, indexed type made up of basic or contiguous types */ j = *curr_index; for (i=j; iindices[i] = st_offset + ints[1+1+i-j]*old_extent; - flat->blocklens[i] = (int) (ints[1]*old_extent); + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1], stride = ints[1+1+i-j]; + flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; + flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent; } *curr_index = i; } @@ -403,7 +500,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, and then strided. Replicate the first one. */ for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -414,7 +511,10 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; for (i=1; iindices[j] = flat->indices[j-num] + (ints[2+i]-ints[1+i])*old_extent; + /* By using ADIO_Offset we preserve +/- sign and + avoid >2G integer arithmetic problems */ + ADIO_Offset stride = ints[2+i]-ints[1+i]; + flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -425,6 +525,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, case MPI_COMBINER_HINDEXED: case MPI_COMBINER_HINDEXED_INTEGER: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n"); + #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, &old_ntypes, &old_combiner); @@ -432,15 +535,20 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) - ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); + { + ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); + } if (prev_index == *curr_index) { /* simplest case, indexed type made up of basic or contiguous types */ j = *curr_index; - MPI_Type_size(types[0], &old_size); + MPI_Type_size(types[0], (int*)&old_size); for (i=j; i2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1+i-j]; flat->indices[i] = st_offset + adds[i-j]; - flat->blocklens[i] = ints[1+i-j]*old_size; + flat->blocklens[i] = blocklength*old_size; } *curr_index = i; } @@ -456,7 +564,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_extent(types[0], &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; } @@ -475,7 +583,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, *curr_index = j; for (m=1; mindices[j] = flat->indices[j-basic_num] + old_extent; + flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-basic_num]; j++; } @@ -487,6 +595,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, case MPI_COMBINER_STRUCT: case MPI_COMBINER_STRUCT_INTEGER: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n"); + #endif top_count = ints[0]; for (n=0; n2G integer arithmetic problems */ + ADIO_Offset blocklength = ints[1+n]; j = *curr_index; flat->indices[j] = st_offset + adds[n]; - MPI_Type_size(types[n], &old_size); - flat->blocklens[j] = ints[1+n] * old_size; + MPI_Type_size(types[n], (int*)&old_size); + flat->blocklens[j] = blocklength * old_size; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",n,adds[n],j, flat->indices[j], j, flat->blocklens[j]); + #endif (*curr_index)++; } else { @@ -515,8 +632,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_extent(types[n], &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + old_extent; + flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple old_extent "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",old_extent,j, flat->indices[j], j, flat->blocklens[j]); + #endif j++; } } @@ -525,9 +645,63 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; + case MPI_COMBINER_RESIZED: + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n"); + #endif + + /* This is done similar to a type_struct with an lb, datatype, ub */ + + /* handle the Lb */ + j = *curr_index; + flat->indices[j] = st_offset + adds[0]; + flat->blocklens[j] = 0; + + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]); + #endif + + (*curr_index)++; + + /* handle the datatype */ + + MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, + &old_ntypes, &old_combiner); + ADIOI_Datatype_iscontig(types[0], &old_is_contig); + + if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { + ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); + } + else { + /* current type is basic or contiguous */ + j = *curr_index; + flat->indices[j] = st_offset; + MPI_Type_size(types[0], (int*)&old_size); + flat->blocklens[j] = old_size; + + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]); + #endif + + (*curr_index)++; + } + + /* take care of the extent as a UB */ + j = *curr_index; + flat->indices[j] = st_offset + adds[0] + adds[1]; + flat->blocklens[j] = 0; + + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]); + #endif + + (*curr_index)++; + + break; + default: /* TODO: FIXME (requires changing prototypes to return errors...) */ - FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n"); + DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n"); MPI_Abort(MPI_COMM_WORLD, 1); } @@ -545,6 +719,10 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ADIOI_Free(adds); ADIOI_Free(types); + #ifdef FLATTEN_DEBUG + DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#X\n",st_offset,*curr_index); + #endif + } /********************************************************/ @@ -569,7 +747,7 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index) int top_count, combiner, old_combiner, old_is_contig; int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes; int *ints; - MPI_Aint *adds; + MPI_Aint *adds; /* Make no assumptions about +/- sign on these */ MPI_Datatype *types; MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner); @@ -789,9 +967,32 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index) } } break; + + case MPI_COMBINER_RESIZED: + /* treat it as a struct with lb, type, ub */ + + /* add 2 for lb and ub */ + (*curr_index) += 2; + count += 2; + + /* add for datatype */ + MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, + &old_ntypes, &old_combiner); + ADIOI_Datatype_iscontig(types[0], &old_is_contig); + + if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { + count += ADIOI_Count_contiguous_blocks(types[0], curr_index); + } + else { + /* basic or contiguous type */ + count++; + (*curr_index)++; + } + break; + default: /* TODO: FIXME */ - FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner); + DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner); MPI_Abort(MPI_COMM_WORLD, 1); } @@ -812,6 +1013,53 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index) #endif /* HAVE_MPIR_TYPE_GET_CONTIG_BLOCKS */ } +/* removezeros() make a second pass over the + * flattented type knocking out zero-length blocks, but leave first and last + * alone (they mark LB and UB) */ + +static void removezeros(ADIOI_Flatlist_node *flat_type) +{ + int i,j,opt_blocks; + ADIO_Offset *opt_blocklens; + ADIO_Offset *opt_indices; + + /* short-circuit: there is nothing to do if there are + * - 1 block: what can we remove? + * - 2 blocks: either both blocks are data (and not zero) + * or one block is the UB or LB */ + if (flat_type->count <= 2) return; + + opt_blocks = 2; /* LB and UB */ + for (i=1; i < flat_type->count -1; i++) { + if(flat_type->blocklens[i] != 0) + opt_blocks++; + } + /* no optimization possible */ + if (opt_blocks == flat_type->count) return; + opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset)); + opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset)); + + /* fill in new blocklists, keeping first and last no matter what */ + opt_blocklens[0] = flat_type->blocklens[0]; + opt_indices[0] = flat_type->indices[0]; + j = 1; /* always two entries: one for LB and UB ([0] and [j])*/ + for (i=1; i< flat_type->count -1; i++) { + if( flat_type->blocklens[i] != 0) { + opt_indices[j] = flat_type->indices[i]; + opt_blocklens[j] = flat_type->blocklens[i]; + j++; + } + } + opt_indices[j] = flat_type->indices[flat_type->count -1]; + opt_blocklens[j] = flat_type->blocklens[flat_type->count -1]; + + flat_type->count = opt_blocks; + ADIOI_Free(flat_type->blocklens); + ADIOI_Free(flat_type->indices); + flat_type->blocklens = opt_blocklens; + flat_type->indices = opt_indices; + return; +} /****************************************************************/ @@ -822,14 +1070,14 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index) * contiguous operations). * * NOTE: a further optimization would be to remove zero length blocks. However, - * we do not do this as parts of the code use the presence of zero length - * blocks to indicate UB and LB. + * the first and last blocks must remain as zero length first or last block + * indicates UB and LB. * */ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type) { int i, j, opt_blocks; - int *opt_blocklens; + ADIO_Offset *opt_blocklens; ADIO_Offset *opt_indices; opt_blocks = 1; @@ -844,7 +1092,7 @@ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type) /* if we can't reduce the number of blocks, quit now */ if (opt_blocks == flat_type->count) return; - opt_blocklens = (int *) ADIOI_Malloc(opt_blocks * sizeof(int)); + opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset)); opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset)); /* fill in new blocklists */ @@ -866,6 +1114,7 @@ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type) ADIOI_Free(flat_type->indices); flat_type->blocklens = opt_blocklens; flat_type->indices = opt_indices; + removezeros(flat_type); return; } @@ -885,26 +1134,3 @@ void ADIOI_Delete_flattened(MPI_Datatype datatype) ADIOI_Free(flat); } } - -/* ADIOI_Flatten_copy_type() - * flat - pointer to flatlist node holding offset and lengths - * start - starting index of src type in arrays - * end - one larger than ending index of src type (makes loop clean) - * offset_adjustment - amount to add to "indices" (offset) component - * of each off/len pair copied - */ -void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat, - int old_type_start, - int old_type_end, - int new_type_start, - ADIO_Offset offset_adjustment) -{ - int i, out_index = new_type_start; - - for (i=old_type_start; i < old_type_end; i++) { - flat->indices[out_index] = flat->indices[i] + offset_adjustment; - flat->blocklens[out_index] = flat->blocklens[i]; - out_index++; - } -} - diff --git a/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c b/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c index a5a422a7c7..1d7cab8c4b 100644 --- a/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c +++ b/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c @@ -7,6 +7,7 @@ #include "adio.h" #include "adio_extern.h" +#include "adioi.h" /* returns the current position of the individual file pointer in etype units relative to the current view. */ @@ -14,10 +15,11 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) { ADIOI_Flatlist_node *flat_file; - int i, n_filetypes, flag, frd_size; - int filetype_size, etype_size, filetype_is_contig; + int i, flag; + unsigned filetype_size; + int etype_size, filetype_is_contig; MPI_Aint filetype_extent; - ADIO_Offset disp, byte_offset, sum=0, size_in_file; + ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; @@ -28,7 +30,7 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; - MPI_Type_size(fd->filetype, &filetype_size); + MPI_Type_size(fd->filetype, (int*)&filetype_size); MPI_Type_extent(fd->filetype, &filetype_extent); disp = fd->disp; @@ -41,18 +43,18 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) for (i=0; icount; i++) { sum += flat_file->blocklens[i]; if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] >= byte_offset) { - frd_size = (int) (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent - + flat_file->blocklens[i] - byte_offset); + frd_size = disp + flat_file->indices[i] + + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + + flat_file->blocklens[i] - byte_offset; sum -= frd_size; flag = 1; break; } } } - size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum; + size_in_file = n_filetypes * (ADIO_Offset)filetype_size + sum; *offset = size_in_file/etype_size; } } diff --git a/ompi/mca/io/romio/romio/adio/common/heap-sort.c b/ompi/mca/io/romio/romio/adio/common/heap-sort.c new file mode 100644 index 0000000000..63b6c1c8b5 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/common/heap-sort.c @@ -0,0 +1,133 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include +#include +#include +#include +#include "heap-sort.h" + +#define NOEXP2 + +static void heapify(heap_t *heap, int i); + +/* From Introduction To Algorithms by Cormen, Leiserson, and Rivest */ + +static inline int parent(int i) { + return (i/2); +} + +static inline int left(int i) { + return (2*i); +} + +static inline int right(int i) { + return (2*i + 1); +} + +int ADIOI_Heap_create(heap_t *heap, int size) { + heap->size = size; + heap->nodes = (heap_node_t *) ADIOI_Calloc (size, sizeof(heap_node_t)); + if (heap->nodes == NULL) + return 1; + else + return 0; +} + +void ADIOI_Heap_free(heap_t *heap) { + ADIOI_Free(heap->nodes); +} + +/* should suppress unused warnings on GCC */ +static void build_heap(heap_t *heap) ATTRIBUTE((unused, used)); + +static void build_heap(heap_t *heap) +{ + int i; + for (i=(heap->size/2-1); i >= 0; i--) + heapify(heap, i); +} + +static void heapify(heap_t *heap, int i) { + int l, r, smallest; + heap_node_t *nodes; + heap_node_t tmp_node; + + nodes = heap->nodes; + + l = left(i); + r = right(i); + + if ((l <= heap->size) && (nodes[l].offset < nodes[i].offset)) + smallest = l; + else + smallest = i; + + if ((r <= heap->size) && (nodes[r].offset < nodes[smallest].offset)) + smallest = r; + + if (smallest != i) { + tmp_node = nodes[i]; + nodes[i] = nodes[smallest]; + nodes[smallest] = tmp_node; + heapify(heap, smallest); + } +} + +void ADIOI_Heap_insert(heap_t *heap, ADIO_Offset offset, int proc, + ADIO_Offset reg_max_len) { + heap_node_t *nodes; + int i; + nodes = heap->nodes; + i = ++heap->size - 1; + while ((i > 0) && (nodes[parent(i)].offset > offset)) { + nodes[i] = nodes[parent(i)]; + i = parent(i); + } + nodes[i].offset = offset; + nodes[i].proc = proc; + nodes[i].reg_max_len = reg_max_len; +} + +void ADIOI_Heap_extract_min(heap_t *heap, ADIO_Offset* offset, int *proc, + ADIO_Offset *reg_max_len) { + heap_node_t *nodes; + nodes = heap->nodes; + + assert (heap->size > 0); + *offset = nodes[0].offset; + *proc = nodes[0].proc; + *reg_max_len = nodes[0].reg_max_len; + nodes[0] = nodes[heap->size-1]; + heap->size--; + heapify(heap, 0); +} + +/* should suppress unused warnings on GCC */ +static void print_heap(heap_t *heap) ATTRIBUTE((unused, used)); + +static void print_heap(heap_t *heap) +{ +#ifndef NOEXP2 + int i; + double level = 0; + int next_level_idx = 1; + + printf ("heap->size = %d\n", heap->size); + printf ("offsets:\n"); + for (i=0; i < heap->size; i++) { + printf ("%lld ", heap->nodes[i].offset); + + if ((i+1) == next_level_idx) { + printf ("\n"); + next_level_idx += (int) exp2(level+1); + level++; + } + } + printf ("\n"); +#endif +} diff --git a/ompi/mca/io/romio/romio/adio/common/iscontig.c b/ompi/mca/io/romio/romio/adio/common/iscontig.c index a96a8d8808..1b47035cc6 100644 --- a/ompi/mca/io/romio/romio/adio/common/iscontig.c +++ b/ompi/mca/io/romio/romio/adio/common/iscontig.c @@ -60,7 +60,8 @@ void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) #elif defined(OMPI_BUILDING) && OMPI_BUILDING -/* This function is included in Open MPI source code */ +/* void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) is defined + * and implemented in OpenMPI itself */ #else diff --git a/ompi/mca/io/romio/romio/adio/common/lock.c b/ompi/mca/io/romio/romio/adio/common/lock.c index d21ec4d0ed..fda652c524 100644 --- a/ompi/mca/io/romio/romio/adio/common/lock.c +++ b/ompi/mca/io/romio/romio/adio/common/lock.c @@ -93,7 +93,7 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, ADIO_Offset len) { - int err, error_code; + int err, error_code, err_count = 0, sav_errno; struct flock lock; if (len == 0) return MPI_SUCCESS; @@ -120,16 +120,42 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, lock.l_len = len; #endif + sav_errno = errno; /* save previous errno in case we recover from retryable errors */ errno = 0; do { err = fcntl(fd, cmd, &lock); - } while (err && (errno == EINTR)); +#ifdef USE_DBG_LOGGING +/* if (MPIU_DBG_SELECTED(ROMIO,TERSE)) */ + { + if (err && ((errno == EINTR) || (errno == EINPROGRESS))) + { + if((err_count < 5) || (err_count > 9995)) + { + fprintf(stderr, "File locking failed in ADIOI_Set_lock(fd %#X,cmd %s/%#X,type %s/%#X,whence %#X) with return value %#X and errno %#X. Retry (%d).\n", + fd, + ((cmd == F_GETLK )? "F_GETLK" : + ((cmd == F_SETLK )? "F_SETLK" : + ((cmd == F_SETLKW )? "F_SETLKW" : "UNEXPECTED"))), + cmd, + ((type == F_RDLCK )? "F_RDLCK" : + ((type == F_WRLCK )? "F_WRLCK" : + ((type == F_UNLCK )? "F_UNLOCK" : "UNEXPECTED"))), + type, + whence, err, errno, err_count); + perror("ADIOI_Set_lock:"); + fprintf(stderr,"ADIOI_Set_lock:offset %#llx, length %#llx\n",(unsigned long long)offset, (unsigned long long)len); + } + } + } +#endif + } while (err && ((errno == EINTR) || ((errno == EINPROGRESS) && (++err_count < 10000)))); if (err && (errno != EBADF)) { /* FIXME: This should use the error message system, especially for MPICH2 */ FPRINTF(stderr, "File locking failed in ADIOI_Set_lock(fd %X,cmd %s/%X,type %s/%X,whence %X) with return value %X and errno %X.\n" - "If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n", + "- If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n" + "- If the file system is LUSTRE, ensure that the directory is mounted with the 'flock' option.\n", fd, ((cmd == F_GETLK )? "F_GETLK" : ((cmd == F_SETLK )? "F_SETLK" : @@ -145,6 +171,9 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, MPI_Abort(MPI_COMM_WORLD, 1); } + if(!err) /* report fcntl failure errno's (EBADF), otherwise */ + errno = sav_errno; /* restore previous errno in case we recovered from retryable errors */ + error_code = (err == 0) ? MPI_SUCCESS : MPI_ERR_UNKNOWN; return error_code; } diff --git a/ompi/mca/io/romio/romio/adio/common/malloc.c b/ompi/mca/io/romio/romio/adio/common/malloc.c index ee9546528f..55306842bd 100644 --- a/ompi/mca/io/romio/romio/adio/common/malloc.c +++ b/ompi/mca/io/romio/romio/adio/common/malloc.c @@ -14,16 +14,11 @@ Later on, add some tracing and error checking, similar to MPID_trmalloc. */ -/* can't include adio.h here, because of the macro, so - * include romioconf.h to make sure config-time defines get included */ - -#include "romioconf.h" +#include "adio.h" #include "mpi.h" #include #include #include "mpipr.h" -/* Open MPI: This seemes to have been missing */ -#include "adio.h" #ifdef HAVE_MALLOC_H #include @@ -35,66 +30,84 @@ /* style: allow:calloc:1 sig:0 */ /* style: allow:realloc:1 sig:0 */ - #define FPRINTF fprintf -void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname); -void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname); -void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname); -void ADIOI_Free_fn(void *ptr, int lineno, char *fname); -void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname) +void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname); +void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname); +void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname); +void ADIOI_Free_fn(void *ptr, int lineno, const char *fname); + +void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname) { void *new; #ifdef ROMIO_XFS new = (void *) memalign(XFS_MEMALIGN, size); +#else +#ifdef HAVE_MPIU_FUNCS + new = (void *) MPIU_Malloc(size); #else new = (void *) malloc(size); +#endif #endif if (!new) { FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno); MPI_Abort(MPI_COMM_WORLD, 1); } - + DBG_FPRINTF(stderr, "ADIOI_Malloc %s:<%d> %p (%#zX)\n", fname, lineno, new, size); return new; } -void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname) +void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname) { void *new; +#ifdef HAVE_MPIU_FUNCS + new = (void *) MPIU_Calloc(nelem, elsize); +#else new = (void *) calloc(nelem, elsize); +#endif if (!new) { FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno); MPI_Abort(MPI_COMM_WORLD, 1); } - + DBG_FPRINTF(stderr, "ADIOI_Calloc %s:<%d> %p\n", fname, lineno, new); return new; } -void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname) +void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname) { void *new; +#ifdef HAVE_MPIU_FUNCS + new = (void *) MPIU_Realloc(ptr, size); +#else new = (void *) realloc(ptr, size); +#endif if (!new) { FPRINTF(stderr, "realloc failed in file %s, line %d\n", fname, lineno); MPI_Abort(MPI_COMM_WORLD, 1); } + DBG_FPRINTF(stderr, "ADIOI_Realloc %s:<%d> %p\n", fname, lineno, new); return new; } -void ADIOI_Free_fn(void *ptr, int lineno, char *fname) +void ADIOI_Free_fn(void *ptr, int lineno, const char *fname) { + DBG_FPRINTF(stderr, "ADIOI_Free %s:<%d> %p\n", fname, lineno, ptr); if (!ptr) { FPRINTF(stderr, "Attempt to free null pointer in file %s, line %d\n", fname, lineno); MPI_Abort(MPI_COMM_WORLD, 1); } +#ifdef HAVE_MPIU_FUNCS + MPIU_Free(ptr); +#else free(ptr); +#endif } diff --git a/ompi/mca/io/romio/romio/adio/common/system_hints.c b/ompi/mca/io/romio/romio/adio/common/system_hints.c index 361f16addb..bd01d3b005 100644 --- a/ompi/mca/io/romio/romio/adio/common/system_hints.c +++ b/ompi/mca/io/romio/romio/adio/common/system_hints.c @@ -40,6 +40,28 @@ #define ROMIO_HINT_DEFAULT_CFG "/etc/romio-hints" #define ROMIO_HINT_ENV_VAR "ROMIO_HINTS" + /* should suppress unused warnings on GCC */ +static void dump_keys(MPI_Info info) ATTRIBUTE((unused, used)); + +/* debug function: a routine I want in the library to make my life easier when + * using a source debugger. please ignore any "defined but not used" warnings + */ +static void dump_keys(MPI_Info info) +{ + int i, nkeys, flag; + char key[MPI_MAX_INFO_KEY]; + char value[MPI_MAX_INFO_VAL]; + + MPI_Info_get_nkeys(info, &nkeys); + + for (i=0; ifns->ADIOI_xxx_OpenColl))(fd, rank, access_mode, error_code) + #define ADIO_ReadContig(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \ (*(fd->fns->ADIOI_xxx_ReadContig))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) @@ -269,19 +287,31 @@ struct ADIOI_Fns_struct { #define ADIO_SetInfo(fd, users_info, error_code) \ (*(fd->fns->ADIOI_xxx_SetInfo))(fd, users_info, error_code) +#define ADIO_Feature(fd, flag) \ + (*(fd->fns->ADIOI_xxx_Feature))(fd, flag) + /* structure for storing access info of this process's request from the file domain of other processes, and vice-versa. used as array of structures indexed by process number. */ typedef struct { ADIO_Offset *offsets; /* array of offsets */ - int *lens; /* array of lengths */ + int *lens; /* array of lengths */ + /* consider aints or offsets for lens? Seems to be used as in-memory + buffer lengths, so it should be < 2G and ok as an int */ MPI_Aint *mem_ptrs; /* array of pointers. used in the read/write phase to indicate where the data is stored in memory */ int count; /* size of above arrays */ } ADIOI_Access; +/* structure for storing generic offset/length pairs. used to describe + file realms among other things */ +typedef struct { + ADIO_Offset *offsets; /* array of offsets */ + int *lens; /* array of lengths */ + int count; /* size of above arrays */ +} ADIOI_Offlen; /* prototypes for ADIO internal functions */ @@ -292,21 +322,29 @@ void ADIOI_Flatten(MPI_Datatype type, ADIOI_Flatlist_node *flat, void ADIOI_Delete_flattened(MPI_Datatype datatype); int ADIOI_Count_contiguous_blocks(MPI_Datatype type, int *curr_index); void ADIOI_Complete_async(int *error_code); -void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname); -void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname); -void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname); -void ADIOI_Free_fn(void *ptr, int lineno, char *fname); +void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname); +void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname); +void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname); +void ADIOI_Free_fn(void *ptr, int lineno, const char *fname); void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag); void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset); void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset); void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp); void ADIOI_process_system_hints(MPI_Info info); +void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo, + MPI_Info *new_info); void ADIOI_GEN_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code); void ADIOI_GEN_Flush(ADIO_File fd, int *error_code); +void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code); +void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code); +void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, + int access_mode, int *error_code); void ADIOI_GEN_Delete(char *filename, int *error_code); void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, @@ -347,6 +385,8 @@ int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout, ADIO_Status *status); int ADIOI_GEN_aio_query_fn(void *extra_state, ADIO_Status *status); int ADIOI_GEN_aio_free_fn(void *extra_state); +int ADIOI_GEN_Feature(ADIO_File fd, int feature); + void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, MPI_Datatype buftype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int @@ -373,7 +413,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count, *error_code); void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset - offset, ADIO_Offset **offset_list_ptr, int + offset, ADIO_Offset **offset_list_ptr, ADIO_Offset **len_list_ptr, ADIO_Offset *start_offset_ptr, ADIO_Offset *end_offset_ptr, int *contig_access_count_ptr); @@ -381,7 +421,9 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset *end_offsets, int nprocs, int nprocs_for_coll, ADIO_Offset *min_st_offset_ptr, ADIO_Offset **fd_start_ptr, ADIO_Offset - **fd_end_ptr, ADIO_Offset *fd_size_ptr); + **fd_end_ptr, int min_fd_size, + ADIO_Offset *fd_size_ptr, + int striping_unit); int ADIOI_Calc_aggregator(ADIO_File fd, ADIO_Offset off, ADIO_Offset min_off, @@ -390,7 +432,7 @@ int ADIOI_Calc_aggregator(ADIO_File fd, ADIO_Offset *fd_start, ADIO_Offset *fd_end); void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, - int *len_list, int + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, @@ -405,6 +447,107 @@ void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, int nprocs, int myrank, int *count_others_req_procs_ptr, ADIOI_Access **others_req_ptr); + +/* KC && AC - New Collective I/O internals*/ + +#define TEMP_OFF 0 +#define REAL_OFF 1 +#define MAX_OFF_TYPE 2 + +/* Communication Tags */ +#define DATA_TAG 30 +#define AMT_TAG 31 + +/* cb_fr_type user size is non-zero */ +#define ADIOI_FR_AAR 0 +#define ADIOI_FR_FSZ -1 +#define ADIOI_FR_USR_REALMS -2 + +typedef struct flatten_state +{ + ADIO_Offset abs_off; + ADIO_Offset cur_sz; + ADIO_Offset idx; + ADIO_Offset cur_reg_off; +} flatten_state; + +typedef struct view_state +{ + ADIO_Offset fp_ind; /* file view params*/ + ADIO_Offset disp; /* file view params*/ + ADIO_Offset byte_off; + ADIO_Offset sz; + ADIO_Offset ext; /* preserved extent from MPI_Type_extent */ + ADIO_Offset type_sz; + + /* Current state */ + flatten_state cur_state; + /* Scratch state for counting up ol pairs */ + flatten_state tmp_state; + + /* Preprocessed data amount and ol pairs */ + ADIO_Offset pre_sz; + int pre_ol_ct; + MPI_Aint *pre_disp_arr; + int *pre_blk_arr; + + ADIOI_Flatlist_node *flat_type_p; +} view_state; + +void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, + int file_ptr_type, ADIO_Offset offset, + ADIO_Offset *st_offset, ADIO_Offset *end_offset); +int ADIOI_Agg_idx (int rank, ADIO_File fd); +void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset, + ADIO_Offset max_end_offset); +void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, MPI_Datatype custom_ftype, + int rdwr, ADIO_Status *status, int + *error_code); +void ADIOI_IOStridedColl(ADIO_File fd, void *buf, int count, int rdwr, + MPI_Datatype datatype, int file_ptr_type, + ADIO_Offset offset, ADIO_Status *status, int + *error_code); +void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p); +ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type); +void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, + ADIO_File fd, int count, + MPI_Datatype datatype, ADIO_Offset off, + view_state *my_mem_view_state_arr, + view_state *agg_file_view_state_arr, + view_state *client_file_view_state_arr); +int ADIOI_init_view_state(int file_ptr_type, + int nprocs, + view_state *view_state_arr, + int op_type); +int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, + view_state *client_file_view_state_arr, + MPI_Datatype *client_comm_dtype_arr, + ADIO_Offset *client_comm_sz_arr, + ADIO_Offset *agg_dtype_offset_p, + MPI_Datatype *agg_dtype_p); +int ADIOI_Build_client_reqs(ADIO_File fd, + int nprocs, + view_state *my_mem_view_state_arr, + view_state *agg_file_view_state_arr, + ADIO_Offset *agg_comm_sz_arr, + MPI_Datatype *agg_comm_dtype_arr); +int ADIOI_Build_client_pre_req(ADIO_File fd, + int agg_rank, + int agg_idx, + view_state *my_mem_view_state_p, + view_state *agg_file_view_state_p, + ADIO_Offset max_pre_req_sz, + int max_ol_ct); +int ADIOI_Build_client_req(ADIO_File fd, + int agg_rank, + int agg_idx, + view_state *my_mem_view_state_p, + view_state *agg_file_view_state_p, + ADIO_Offset agg_comm_sz, + MPI_Datatype *agg_comm_dtype_p); + ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code); void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); @@ -585,6 +728,23 @@ int ADIOI_Strncpy( char *outstr, const char *instr, size_t maxlen ); int ADIOI_Strnapp( char *, const char *, size_t ); char *ADIOI_Strdup( const char * ); +/* the current MPI standard is not const-correct, and modern compilers warn + * about the following sort of code: + * + * MPI_Info_set(info, "key", "val"); + * + * reminding us that "key" and "val" are const. We use the following macros to + * cast away the const and suppress the warning. */ +#define ADIOI_Info_set(info_,key_str_,val_) \ + MPI_Info_set((info_),((char*)key_str_),(char*)(val_)) +#define ADIOI_Info_get(info_,key_str_,val_len_,val_,flag_) \ + MPI_Info_get((info_),((char*)key_str_),(val_len_),(val_),(flag_)) +#define ADIOI_Info_get_valuelen(info_,key_str_,val_len_,flag_) \ + MPI_Info_get_valuelen((info_),((char*)key_str_),(val_len_),(flag_)) +#define ADIOI_Info_delete(info_,key_str_) \ + MPI_Info_delete((info_),((char*)key_str_)) + + /* Provide a fallback snprintf for systems that do not have one */ /* Define attribute as empty if it has no definition */ #ifndef ATTRIBUTE @@ -644,7 +804,55 @@ int ADIOI_MPE_unlock_a; int ADIOI_MPE_unlock_b; int ADIOI_MPE_postwrite_a; int ADIOI_MPE_postwrite_b; +int ADIOI_MPE_openinternal_a; +int ADIOI_MPE_openinternal_b; +int ADIOI_MPE_stat_a; +int ADIOI_MPE_stat_b; #endif +#ifdef ROMIO_INSIDE_MPICH2 +/* Assert that this MPI_Aint value can be cast to a ptr value without problem.*/ +/* Basic idea is the value should be unchanged after casting + (no loss of (meaningful) high order bytes in 8 byte MPI_Aint + to (possible) 4 byte ptr cast) */ +/* Should work even on 64bit or old 32bit configs */ + /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and + MPI_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */ + #include "mpiimpl.h" + + #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(MPIR_Pint) + /* The next two casts are only used when you don't want sign extension + when casting a (possible 4 byte) aint to a (8 byte) long long or offset */ + #define ADIOI_AINT_CAST_TO_LONG_LONG (long long) + #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG + + #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) MPID_Ensure_Aint_fits_in_pointer(aint_value) + #define ADIOI_Assert MPIU_Assert +#else + #include + #define ADIOI_AINT_CAST_TO_VOID_PTR (void*) + #define ADIOI_AINT_CAST_TO_LONG_LONG (long long) + #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG + #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) + #define ADIOI_Assert assert + #define MPIR_Upint unsigned int + #define MPIU_THREADPRIV_DECL +#endif + +#ifdef USE_DBG_LOGGING /*todo fix dependency on mpich?*/ +/* DBGT_FPRINTF terse level printing */ +#define DBGT_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \ +if (MPIU_DBG_SELECTED(ROMIO,TERSE)) fprintf +/* DBG_FPRINTF default (typical level) printing */ +#define DBG_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \ +if (MPIU_DBG_SELECTED(ROMIO,TYPICAL)) fprintf +/* DBGV_FPRINTF verbose level printing */ +#define DBGV_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \ + if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf +#else /* compile it out */ +#define DBGT_FPRINTF if (0) fprintf +#define DBG_FPRINTF if (0) fprintf +#define DBGV_FPRINTF if (0) fprintf +#endif #endif diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h b/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h index e17c1b01ca..79f9c9e1cd 100644 --- a/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h +++ b/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h @@ -1,6 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* $Id: adioi_errmsg.h,v 1.5 2005/05/23 23:27:49 rross Exp $ - * +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_error.h b/ompi/mca/io/romio/romio/adio/include/adioi_error.h index 448acf340d..d7c3ad233a 100644 --- a/ompi/mca/io/romio/romio/adio/include/adioi_error.h +++ b/ompi/mca/io/romio/romio/adio/include/adioi_error.h @@ -1,6 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* $Id: adioi_error.h,v 1.12 2006/01/05 23:53:58 robl Exp $ - * +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -33,6 +32,17 @@ if (count < 0) { \ goto fn_exit; \ } +#define MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code) \ +if (count*datatype_size != (ADIO_Offset)(unsigned)count*(ADIO_Offset)(unsigned)datatype_size) { \ + error_code = MPIO_Err_create_code(MPI_SUCCESS, \ + MPIR_ERR_RECOVERABLE, \ + myname, __LINE__, \ + MPI_ERR_ARG, \ + "**iobadcount", 0); \ + error_code = MPIO_Err_return_file(fh, error_code); \ + goto fn_exit; \ +} + #define MPIO_CHECK_DATATYPE(fh, datatype, myname, error_code) \ if (datatype == MPI_DATATYPE_NULL) { \ error_code = MPIO_Err_create_code(MPI_SUCCESS, \ diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h b/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h index 2fc7f7f0f8..88c3a838fc 100644 --- a/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h +++ b/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h @@ -89,4 +89,9 @@ extern struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations; extern struct ADIOI_Fns_struct ADIO_GRIDFTP_operations; #endif +#ifdef ROMIO_ZOIDFS +/* prototypes are in adio/ad_zoidfs/ad_zoidfs.h */ +extern struct ADIOI_Fns_struct ADIO_ZOIDFS_operations; +#endif + #endif diff --git a/ompi/mca/io/romio/romio/adio/include/heap-sort.h b/ompi/mca/io/romio/romio/adio/include/heap-sort.h new file mode 100644 index 0000000000..903a1e8060 --- /dev/null +++ b/ompi/mca/io/romio/romio/adio/include/heap-sort.h @@ -0,0 +1,22 @@ +#include "adio.h" + +typedef struct { + ADIO_Offset offset; + int proc; + ADIO_Offset reg_max_len; +} heap_node_t; + +typedef struct { + heap_node_t *nodes; + int size; +} heap_t; + +/*static inline int parent(heap_t *heap, int i); +static inline int left(heap_t *heap, int i); +static inline int right(heap_t *heap, int i); */ +void ADIOI_Heap_free(heap_t *heap); +int ADIOI_Heap_create(heap_t *heap, int size); +void ADIOI_Heap_insert(heap_t *heap, ADIO_Offset offset, int proc, + ADIO_Offset reg_max_len); +void ADIOI_Heap_extract_min(heap_t *heap, ADIO_Offset* key, int *proc, + ADIO_Offset *reg_max_len); diff --git a/ompi/mca/io/romio/romio/adio/include/mpio_error.h b/ompi/mca/io/romio/romio/adio/include/mpio_error.h index 66c7a10433..2a5e524cfa 100644 --- a/ompi/mca/io/romio/romio/adio/include/mpio_error.h +++ b/ompi/mca/io/romio/romio/adio/include/mpio_error.h @@ -1,6 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* $Id: mpio_error.h,v 1.6 2005/05/23 23:27:50 rross Exp $ - * +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ diff --git a/ompi/mca/io/romio/romio/adio/include/mpipr.h b/ompi/mca/io/romio/romio/adio/include/mpipr.h index f14fe22f23..a609f7bf45 100644 --- a/ompi/mca/io/romio/romio/adio/include/mpipr.h +++ b/ompi/mca/io/romio/romio/adio/include/mpipr.h @@ -24,6 +24,8 @@ #define MPI_Alltoall PMPI_Alltoall #undef MPI_Alltoallv #define MPI_Alltoallv PMPI_Alltoallv +#undef MPI_Alltoallw +#define MPI_Alltoallw PMPI_Alltoallw #undef MPI_Attr_delete #define MPI_Attr_delete PMPI_Attr_delete #undef MPI_Attr_get @@ -150,26 +152,6 @@ #define MPI_Group_union PMPI_Group_union #undef MPI_Ibsend #define MPI_Ibsend PMPI_Ibsend -#if 0 -#undef MPI_Info_create -#define MPI_Info_create PMPI_Info_create -#undef MPI_Info_delete -#define MPI_Info_delete PMPI_Info_delete -#undef MPI_Info_dup -#define MPI_Info_dup PMPI_Info_dup -#undef MPI_Info_free -#define MPI_Info_free PMPI_Info_free -#undef MPI_Info_get -#define MPI_Info_get PMPI_Info_get -#undef MPI_Info_get_nkeys -#define MPI_Info_get_nkeys PMPI_Info_get_nkeys -#undef MPI_Info_get_nthkey -#define MPI_Info_get_nthkey PMPI_Info_get_nthkey -#undef MPI_Info_get_valuelen -#define MPI_Info_get_valuelen PMPI_Info_get_valuelen -#undef MPI_Info_set -#define MPI_Info_set PMPI_Info_set -#endif /* only conditionally set the info */ #undef MPI_Init #define MPI_Init PMPI_Init #undef MPI_Initialized @@ -392,4 +374,13 @@ #define MPI_File_f2c PMPI_File_f2c #endif +#undef MPI_Type_get_attr +#define MPI_Type_get_attr PMPI_Type_get_attr +#undef MPI_Type_set_attr +#define MPI_Type_set_attr PMPI_Type_set_attr +#undef MPI_Comm_set_attr +#define MPI_Comm_set_attr PMPI_Comm_set_attr +#undef MPI_Type_create_keyval +#define MPI_Type_create_keyval PMPI_Type_create_keyval + #endif diff --git a/ompi/mca/io/romio/romio/autogen.sh b/ompi/mca/io/romio/romio/autogen.sh new file mode 100644 index 0000000000..458232464a --- /dev/null +++ b/ompi/mca/io/romio/romio/autogen.sh @@ -0,0 +1,2 @@ +: +autoreconf -ivf -I confdb diff --git a/ompi/mca/io/romio/romio/common/dataloop/.state-cache b/ompi/mca/io/romio/romio/common/dataloop/.state-cache deleted file mode 100644 index 37fee8c743..0000000000 --- a/ompi/mca/io/romio/romio/common/dataloop/.state-cache +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ompi/mca/io/romio/romio/common/dataloop/darray_support.c b/ompi/mca/io/romio/romio/common/dataloop/darray_support.c index da1270ac06..6a7d323429 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/darray_support.c +++ b/ompi/mca/io/romio/romio/common/dataloop/darray_support.c @@ -34,7 +34,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size, int procs, tmp_rank, i, tmp_size, blklens[3], *coords; MPI_Aint *st_offsets, orig_extent, disps[3]; - PMPI_Type_extent(oldtype, &orig_extent); + MPI_Type_extent(oldtype, &orig_extent); /* calculate position in Cartesian grid as MPI would (row-major ordering) */ @@ -78,7 +78,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size, st_offsets+i); break; } - if (i) PMPI_Type_free(&type_old); + if (i) MPI_Type_free(&type_old); type_old = type_new; } @@ -116,7 +116,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size, type_old, &type_new, st_offsets+i); break; } - if (i != ndims-1) PMPI_Type_free(&type_old); + if (i != ndims-1) MPI_Type_free(&type_old); type_old = type_new; } @@ -140,9 +140,9 @@ int PREPEND_PREFIX(Type_convert_darray)(int size, types[1] = type_new; types[2] = MPI_UB; - PMPI_Type_struct(3, blklens, disps, types, newtype); + MPI_Type_struct(3, blklens, disps, types, newtype); - PMPI_Type_free(&type_new); + MPI_Type_free(&type_new); DLOOP_Free(st_offsets); DLOOP_Free(coords); return MPI_SUCCESS; @@ -187,18 +187,18 @@ static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs stride = orig_extent; if (order == MPI_ORDER_FORTRAN) { if (dim == 0) - PMPI_Type_contiguous(mysize, type_old, type_new); + MPI_Type_contiguous(mysize, type_old, type_new); else { for (i=0; idim; i--) stride *= array_of_gsizes[i]; - PMPI_Type_hvector(mysize, 1, stride, type_old, type_new); + MPI_Type_hvector(mysize, 1, stride, type_old, type_new); } } @@ -252,7 +252,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc for (i=0; idim; i--) stride *= array_of_gsizes[i]; - PMPI_Type_hvector(count, blksize, stride, type_old, type_new); + MPI_Type_hvector(count, blksize, stride, type_old, type_new); if (rem) { /* if the last block is of size less than blksize, include @@ -265,9 +265,9 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc blklens[0] = 1; blklens[1] = rem; - PMPI_Type_struct(2, blklens, disps, types, &type_tmp); + MPI_Type_struct(2, blklens, disps, types, &type_tmp); - PMPI_Type_free(type_new); + MPI_Type_free(type_new); *type_new = type_tmp; } @@ -282,8 +282,8 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc types[2] = MPI_UB; disps[2] = orig_extent * array_of_gsizes[dim]; blklens[0] = blklens[1] = blklens[2] = 1; - PMPI_Type_struct(3, blklens, disps, types, &type_tmp); - PMPI_Type_free(type_new); + MPI_Type_struct(3, blklens, disps, types, &type_tmp); + MPI_Type_free(type_new); *type_new = type_tmp; *st_offset = 0; /* set it to 0 because it is taken care of in diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c index 91217d00f1..36edb4d280 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c +++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c @@ -38,7 +38,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, MPI_Aint stride; MPI_Aint *disps; - PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); + MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); /* some named types do need dataloops; handle separately. */ if (combiner == MPI_COMBINER_NAMED) { @@ -93,7 +93,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, * note: in the struct case below we'll handle any additional * types "below" the current one. */ - PMPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3, + MPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner); if (type0_combiner != MPI_COMBINER_NAMED) { @@ -228,7 +228,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, case MPI_COMBINER_STRUCT: for (i = 1; i < ints[0]; i++) { int type_combiner; - PMPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3, + MPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3, &type_combiner); if (type_combiner != MPI_COMBINER_NAMED) { @@ -288,7 +288,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, dldepth_p, flag); - PMPI_Type_free(&tmptype); + MPI_Type_free(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; @@ -309,7 +309,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, dldepth_p, flag); - PMPI_Type_free(&tmptype); + MPI_Type_free(&tmptype); break; case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h index 3fbd42ee4a..db645d5a90 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h +++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h @@ -80,16 +80,4 @@ int PREPEND_PREFIX(Type_convert_darray)(int size, MPI_Datatype oldtype, MPI_Datatype *newtype); -#if 0 -/* Helper functions for accessing datatype contents */ -void PREPEND_PREFIX(Type_access_contents)(MPI_Datatype type, - int **ints_p, - MPI_Aint **aints_p, - MPI_Datatype **types_p); -void PREPEND_PREFIX(Type_release_contents)(MPI_Datatype type, - int **ints_p, - MPI_Aint **aints_p, - MPI_Datatype **types_p); -#endif - #endif diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h b/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h index 3f2a1c3509..5894dad2d0 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h +++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h @@ -204,9 +204,6 @@ typedef struct DLOOP_Dataloop_common { this union, 'count', allows quick access to the shared 'count' field in the five dataloop structure. . extent - The extent of the dataloop -#if 0 -- handle - handle for the corresponding 'MPI_Datatype'. -#endif Module: Datatype diff --git a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c index 00731bc9ad..5dbc7a42a5 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c +++ b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c @@ -80,7 +80,7 @@ void MPIO_Datatype_init_dataloop(MPI_Datatype type) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { @@ -107,7 +107,7 @@ void MPIO_Datatype_get_size(MPI_Datatype type, MPI_Offset *size_p) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { @@ -131,7 +131,7 @@ void MPIO_Datatype_get_extent(MPI_Datatype type, MPI_Offset *extent_p) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { @@ -163,7 +163,7 @@ void MPIO_Datatype_get_block_info(MPI_Datatype type, int mpi_errno, attrflag; int nr_ints, nr_aints, nr_types, combiner; - mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, + mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); DLOOP_Assert(mpi_errno == MPI_SUCCESS); @@ -183,7 +183,7 @@ void MPIO_Datatype_get_block_info(MPI_Datatype type, MPIO_Segment *segp; MPI_Offset bytes; - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { @@ -231,7 +231,7 @@ void MPIO_Datatype_get_el_type(MPI_Datatype type, int mpi_errno; int nr_ints, nr_aints, nr_types, combiner; - mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, + mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); DLOOP_Assert(mpi_errno == MPI_SUCCESS); @@ -273,7 +273,7 @@ void MPIO_Datatype_get_loopptr(MPI_Datatype type, MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_PTR)) @@ -293,7 +293,7 @@ void MPIO_Datatype_get_loopsize(MPI_Datatype type, int *size_p, int flag) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_SIZE)) @@ -313,7 +313,7 @@ void MPIO_Datatype_get_loopdepth(MPI_Datatype type, int *depth_p, int flag) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_DEPTH)) @@ -333,7 +333,7 @@ void MPIO_Datatype_set_loopptr(MPI_Datatype type, MPIO_Dataloop *ptr, int flag) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { dtp = MPIO_Datatype_allocate(type); @@ -355,7 +355,7 @@ void MPIO_Datatype_set_loopsize(MPI_Datatype type, int size, int flag) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { dtp = MPIO_Datatype_allocate(type); @@ -375,7 +375,7 @@ void MPIO_Datatype_set_loopdepth(MPI_Datatype type, int depth, int flag) MPIO_Datatype_initialize(); } - mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); + mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (!attrflag) { dtp = MPIO_Datatype_allocate(type); @@ -390,7 +390,7 @@ int MPIO_Datatype_is_nontrivial(MPI_Datatype type) { int nr_ints, nr_aints, nr_types, combiner; - PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); + MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); if (combiner != MPI_COMBINER_NAMED || type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT || @@ -409,20 +409,20 @@ static int MPIO_Datatype_initialize(void) DLOOP_Assert(MPIO_Datatype_keyval == MPI_KEYVAL_INVALID); /* create keyval for dataloop storage */ - mpi_errno = PMPI_Type_create_keyval(MPIO_Datatype_copy_attr_function, + mpi_errno = MPI_Type_create_keyval(MPIO_Datatype_copy_attr_function, MPIO_Datatype_delete_attr_function, &MPIO_Datatype_keyval, NULL); DLOOP_Assert(mpi_errno == MPI_SUCCESS); /* create keyval to hook to COMM_WORLD for finalize */ - mpi_errno = PMPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, + mpi_errno = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPIO_Datatype_finalize, &MPIO_Datatype_finalize_keyval, NULL); DLOOP_Assert(mpi_errno == MPI_SUCCESS); - mpi_errno = PMPI_Comm_set_attr(MPI_COMM_WORLD, + mpi_errno = MPI_Comm_set_attr(MPI_COMM_WORLD, MPIO_Datatype_finalize_keyval, NULL); DLOOP_Assert(mpi_errno == MPI_SUCCESS); @@ -444,10 +444,10 @@ static int MPIO_Datatype_finalize(MPI_Comm comm, DLOOP_Assert(MPIO_Datatype_keyval != MPI_KEYVAL_INVALID); /* remove keyvals */ - mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_keyval); + mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_keyval); DLOOP_Assert(mpi_errno == MPI_SUCCESS); - mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval); + mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval); DLOOP_Assert(mpi_errno == MPI_SUCCESS); printf("freed keyvals\n"); @@ -468,7 +468,7 @@ static MPIO_Datatype *MPIO_Datatype_allocate(MPI_Datatype type) dtp->dloop_size = -1; dtp->dloop_depth = -1; - mpi_errno = PMPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp); + mpi_errno = MPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp); DLOOP_Assert(mpi_errno == MPI_SUCCESS); printf("allocated attr struct\n"); @@ -496,13 +496,13 @@ static void MPIO_Datatype_set_szext(MPI_Datatype type, MPIO_Datatype *dtp) int size; MPI_Aint lb, extent, true_lb, true_extent; - mpi_errno = PMPI_Type_size(type, &size); + mpi_errno = MPI_Type_size(type, &size); DLOOP_Assert(mpi_errno == MPI_SUCCESS); - mpi_errno = PMPI_Type_get_extent(type, &lb, &extent); + mpi_errno = MPI_Type_get_extent(type, &lb, &extent); DLOOP_Assert(mpi_errno == MPI_SUCCESS); - mpi_errno = PMPI_Type_get_true_extent(type, &true_lb, &true_extent); + mpi_errno = MPI_Type_get_true_extent(type, &true_lb, &true_extent); dtp->size = (MPI_Offset) size; dtp->extent = (MPI_Offset) extent; diff --git a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h index 610fd4ac5c..5c9bbaed7b 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h +++ b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h @@ -12,8 +12,12 @@ #include /* romioconf.h must be included *before* mpi.h to avoid some redeclarations */ +#ifdef HAVE_MPITYPEDEFS_H #include "mpitypedefs.h" +#endif +#ifdef HAVE_MPICHCONF_H #include "mpichconf.h" +#endif #include "romioconf.h" #include diff --git a/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c b/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c index d90c4f5adf..04b1603db3 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c +++ b/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c @@ -548,16 +548,7 @@ static int DLOOP_Segment_index_count_block(DLOOP_Offset *blocks_p, * DLOOP_Count i and DLOOP_Offset size would need to be * declared above. */ -#if 0 - last_loc = rel_off * offsetarray[0] + blockarray[0] * el_size; - for (i=1; i < count; i++) { - if (last_loc == rel_off + offsetarray[i]) new_blk_count--; - - last_loc = rel_off + offsetarray[i] + blockarray[i] * el_size; - } -#else last_loc = rel_off + offsetarray[count-1] + blockarray[count-1] * el_size; -#endif paramp->last_loc = last_loc; paramp->count += new_blk_count; @@ -690,19 +681,6 @@ static int DLOOP_Segment_vector_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Handle_get_size_macro(el_type, el_size); blocks_left = *blocks_p; -#if 0 - MPIU_DBG_MSG_FMT(DATATYPE,VERBOSE,(MPIU_DBG_FDEST, - "\t[vector to vec: do=%d, dp=%x, len=%d, ind=%d, ct=%d, blksz=%d, str=%d, blks=%d]\n", - (unsigned) rel_off, - (unsigned) (MPI_Aint)bufp, - paramp->u.pack_vector.length, - paramp->u.pack_vector.index, - count, - blksz, - stride, - (int) *blocks_p)); -#endif - for (i=0; i < count && blocks_left > 0; i++) { int last_idx; char *last_end = NULL; diff --git a/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c b/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c index becce18349..a29d3fbd07 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c +++ b/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c @@ -21,23 +21,23 @@ int PREPEND_PREFIX(Type_convert_subarray)(int ndims, int i, blklens[3]; MPI_Datatype tmp1, tmp2, types[3]; - PMPI_Type_extent(oldtype, &extent); + MPI_Type_extent(oldtype, &extent); if (order == MPI_ORDER_FORTRAN) { /* dimension 0 changes fastest */ if (ndims == 1) { - PMPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1); + MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1); } else { - PMPI_Type_vector(array_of_subsizes[1], + MPI_Type_vector(array_of_subsizes[1], array_of_subsizes[0], array_of_sizes[0], oldtype, &tmp1); size = array_of_sizes[0]*extent; for (i=2; i=0; i--) { size *= array_of_sizes[i+1]; - PMPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2); - PMPI_Type_free(&tmp1); + MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2); + MPI_Type_free(&tmp1); tmp1 = tmp2; } } @@ -91,9 +91,9 @@ int PREPEND_PREFIX(Type_convert_subarray)(int ndims, types[1] = tmp1; types[2] = MPI_UB; - PMPI_Type_struct(3, blklens, disps, types, newtype); + MPI_Type_struct(3, blklens, disps, types, newtype); - PMPI_Type_free(&tmp1); + MPI_Type_free(&tmp1); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c b/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c index 4a3dc60f73..b1b96c465f 100644 --- a/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c +++ b/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c @@ -162,7 +162,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type, int ndims; MPI_Datatype tmptype; - mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, + mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); DLOOP_Assert(mpi_errno == MPI_SUCCESS); @@ -170,8 +170,8 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type, int mpisize; MPI_Aint mpiextent; - PMPI_Type_size(type, &mpisize); - PMPI_Type_extent(type, &mpiextent); + MPI_Type_size(type, &mpisize); + MPI_Type_extent(type, &mpiextent); tfp->size = (DLOOP_Offset) mpisize; tfp->lb = 0; tfp->ub = (DLOOP_Offset) mpiextent; @@ -369,7 +369,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type, types[0], &tmptype); PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp); - PMPI_Type_free(&tmptype); + MPI_Type_free(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; @@ -386,7 +386,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type, &tmptype); PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp); - PMPI_Type_free(&tmptype); + MPI_Type_free(&tmptype); break; case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: @@ -437,7 +437,7 @@ static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type, /* skip zero blocklength elements */ if (ints[i+1] == 0) continue; - PMPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types, + MPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types, &combiner); /* opt: could just inline assignments for combiner == NAMED case */ @@ -530,10 +530,6 @@ static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type, } } -#if 0 - printf("size = %d, extent = %d\n", (int) tmp_size, (int) tmp_extent); -#endif - tfp->size = tmp_size; tfp->lb = min_lb; tfp->ub = max_ub; @@ -576,7 +572,7 @@ static int DLOOP_Named_type_alignsize(MPI_Datatype type, MPI_Aint disp) if (type == MPI_LB || type == MPI_UB) return 0; - PMPI_Type_size(type, &alignsize); + MPI_Type_size(type, &alignsize); switch(type) { @@ -882,46 +878,3 @@ static int DLOOP_Structalign_llint_position() if (padding_varies_by_pos) return 1; else return 0; } - -#if 0 -/* from MPICH2 PAC_C_DOUBLE_ALIGNMENT_EXCEPTION test: - * - * Other tests assume that there is potentially a maximum alignment - * and that if there is no maximum alignment, or a type is smaller than - * that value, then we align on the size of the value, with the exception - * of the "position-based alignment" rules we test for separately. - * - * It turns out that these assumptions have fallen short in at least one - * case, on MacBook Pros, where doubles are aligned on 4-byte boundaries - * even when long doubles are aligned on 16-byte boundaries. So this test - * is here specifically to handle this case. - * - * Return value is 4 or 0. -*/ -static int double_align_exception() -{ - struct { char a; double b; } char_double; - struct { double b; char a; } double_char; - int extent1, extent2, align_4 = 0; - - extent1 = sizeof(char_double); - extent2 = sizeof(double_char); - - /* we're interested in the largest value, will let separate test - * deal with position-based issues. - */ - if (extent1 < extent2) extent1 = extent2; - if ((sizeof(double) == 8) && (extent1 % 8) != 0) { - if (extent1 % 4 == 0) { -#ifdef HAVE_MAX_FP_ALIGNMENT - if (HAVE_MAX_FP_ALIGNMENT >= 8) align_4 = 1; -#else - align_4 = 1; -#endif - } - } - - if (align_4) return 4; - else return 0; -} -#endif diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_am.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_am.m4 new file mode 100644 index 0000000000..4d3b0de225 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_am.m4 @@ -0,0 +1,6 @@ +dnl AM_IGNORE is an extension that tells (a patched) automake not to +dnl include the specified AC_SUBST variable in the Makefile.in that +dnl automake generates. We don't use AC_DEFUN, since aclocal will +dnl then complain that AM_IGNORE is a duplicate (if you are using the +dnl patched automake/aclocal). +m4_ifdef([AM_IGNORE],[],[m4_define([AM_IGNORE],[])]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4 new file mode 100644 index 0000000000..4e96cfbf36 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4 @@ -0,0 +1,227 @@ +dnl /*D PAC_C_MEMATOMIC - Try and determine how to implement memory-atomic +dnl operations with the selected C compiler +dnl +dnl Synopsis: +dnl PAC_C_MEMATOMIC +dnl +dnl Notes: +dnl Defines names of the following form +dnl + HAVE_GCC_ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - gcc __asm__ will issue +dnl mfence, lfence, or sfence +dnl . HAVE___ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - __asm _emit will issue +dnl mfence, lfence, or sfence +dnl . HAVE_ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - asm("...") will issue +dnl mfence, lfence, or sfence +dnl . HAVE__INTERLOCKEDEXCHANGE - _InterlockedExchange intrinsic is available +dnl (IA64) +dnl . HAVE_GCC_ASM_SPARC_MEMBAR - gcc __asm__ will issue SPARC architecture +dnl memory barrier instruction +dnl . HAVE_SOLARIS_ASM_SPARC_MEMBAR - Solaris asm() will issue SPARC +dnl architecture memory barrier instruction +dnl . HAVE_GCC_ASM_SPARC_STBAR - gcc __asm__ will issue stbar +dnl - HAVE_SOLARIS_ASM_SPARC_STBAR - Solaris __asm() will issue stbar +dnl +dnl D*/ +AC_DEFUN([PAC_C_MEMATOMIC],[ +AC_CACHE_CHECK([for x86 mfence instruction using __asm__], + pac_cv_have_gcc_asm_and_x86_mfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xf0" ::: "memory" ); + exit(0); +} +], +pac_cv_have_gcc_asm_and_x86_mfence=yes,pac_cv_have_gcc_asm_and_x86_mfence=no)]) + +if test "$pac_cv_have_gcc_asm_and_x86_mfence" = "yes" ; then + AC_DEFINE(HAVE_GCC_ASM_AND_X86_MFENCE, 1, [Define if using gcc on a x86 system with the mfence instruction]) +fi + +AC_CACHE_CHECK([for x86 sfence instruction using __asm__], + pac_cv_have_gcc_asm_and_x86_sfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xf8" ::: "memory" ); + exit(0); +} +], +pac_cv_have_gcc_asm_and_x86_sfence=yes,pac_cv_have_gcc_asm_and_x86_sfence=no)]) + +if test "$pac_cv_have_gcc_asm_and_x86_sfence" = "yes" ; then + AC_DEFINE(HAVE_GCC_ASM_AND_X86_SFENCE, 1, [Define if using gcc on a x86 system with the sfence instruction]) +fi + +AC_CACHE_CHECK([for x86 lfence instruction using __asm__], + pac_cv_have_gcc_asm_and_x86_lfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xe8" ::: "memory" ); + exit(0); +} +], +pac_cv_have_gcc_asm_and_x86_lfence=yes,pac_cv_have_gcc_asm_and_x86_lfence=no)]) + +if test "$pac_cv_have_gcc_asm_and_x86_lfence" = "yes" ; then + AC_DEFINE(HAVE_GCC_ASM_AND_X86_LFENCE, 1, [Define if using gcc on a x86 system with the lfence instruction]) +fi + +dnl Some compilers, like icc, may want __asm _emit +AC_CACHE_CHECK([for x86 mfence instruction using __asm], + pac_cv_have___asm_and_x86_mfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm _emit 0x0f __asm _emit 0xae __asm _emit 0xf0 ; + exit(0); +} +], +pac_cv_have___asm_and_x86_mfence=yes,pac_cv_have___asm_and_x86_mfence=no)]) + +if test "$pac_cv_have___asm_and_x86_mfence" = "yes" ; then + AC_DEFINE(HAVE___ASM_AND_X86_MFENCE, 1, [Define if using __asm on a x86 system with the mfence instruction]) +fi + +AC_CACHE_CHECK([for x86 sfence instruction using __asm], + pac_cv_have___asm_and_x86_sfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm sfence ; + exit(0); +} +], +pac_cv_have___asm_and_x86_sfence=yes,pac_cv_have___asm_and_x86_sfence=no)]) + +if test "$pac_cv_have___asm_and_x86_sfence" = "yes" ; then + AC_DEFINE(HAVE___ASM_AND_X86_SFENCE, 1, [Define if using __asm on a x86 system with the sfence instruction]) +fi + +AC_CACHE_CHECK([for x86 lfence instruction using __asm], + pac_cv_have___asm_and_x86_lfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + __asm _emit 0x0f __asm _emit 0xae __asm _emit 0xe8 ; + exit(0); +} +], +pac_cv_have___asm_and_x86_lfence=yes,pac_cv_have___asm_and_x86_lfence=no)]) + +if test "$lac_cv_have___asm_and_x86_lfence" = "yes" ; then + AC_DEFINE(HAVE___ASM_AND_X86_LFENCE, 1, [Define if using __asm on a x86 system with the lfence instruction]) +fi + +dnl +dnl Some compilers, such as pgcc, may require additional arguments. +dnl pgcc may need -Masmkeyword flag. We may want to try this with and +dnl without adding -Masmkeyword to CFLAGS + +AC_CACHE_CHECK([for x86 mfence instruction using asm()], + pac_cv_have_asm_and_x86_mfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + asm("_emit 0x0f __asm _emit 0xae __asm _emit 0xf0"); + exit(0); +} +], +pac_cv_have_asm_and_x86_mfence=yes,pac_cv_have_asm_and_x86_mfence=no)]) + +if test "$pac_cv_have_asm_and_x86_mfence" = "yes" ; then + AC_DEFINE(HAVE_ASM_AND_X86_MFENCE, 1, [Define if using asm() on a x86 system with the mfence instruction]) +fi + +AC_CACHE_CHECK([for x86 sfence instruction using asm()], + pac_cv_have_asm_and_x86_sfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + asm("sfence"); + exit(0); +} +], +pac_cv_have_asm_and_x86_sfence=yes,pac_cv_have_asm_and_x86_sfence=no)]) + +if test "$pac_cv_have_asm_and_x86_sfence" = "yes" ; then + AC_DEFINE(HAVE_ASM_AND_X86_SFENCE, 1, [Define if using asm() on a x86 system with the sfence instruction]) +fi + +AC_CACHE_CHECK([for x86 lfence instruction using asm()], + pac_cv_have_asm_and_x86_lfence,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + asm("_emit 0x0f __asm _emit 0xae __asm _emit 0xe8"); + exit(0); +} +], +pac_cv_have_asm_and_x86_lfence=yes,pac_cv_have_asm_and_x86_lfence=no)]) + +if test "$pac_cv_have_asm_and_x86_lfence" = "yes" ; then + AC_DEFINE(HAVE_ASM_AND_X86_LFENCE, 1, [Define if using asm() on a x86 system with the lfence instruction]) +fi + +AC_CACHE_CHECK([for _InterlockedExchange intrinsic], + pac_cv_have__InterlockedExchange,[ +AC_TRY_RUN([ +int main(int argc, char **argv) +{ + unsigned long lock, *lock_ptr; + lock_ptr = &lock; + _InterlockedExchange(lock_ptr, 1); + exit(0); +} +], +pac_cv_have__InterlockedExchange=yes,pac_cv_have__InterlockedExchange=no)]) + +if test "$pac_cv_have__InterlockedExchange" = "yes" ; then + AC_DEFINE(HAVE__INTERLOCKEDEXCHANGE, 1, [Define if _InterlockedExchange intrinsic is available]) +fi + +AC_CACHE_CHECK([for SPARC membar instruction with gcc], + pac_cv_gcc_sparc_membar,[ +AC_TRY_RUN([ +int main(int argc, char **argv){ + __asm__ __volatile__ ( "membar #StoreLoad | #StoreStore" : : : "memory" ); + exit(0); +}],pac_cv_gcc_sparc_membar=yes,pac_cv_gcc_sparc_membar=no)]) +if test "$pac_cv_gcc_sparc_membar" = yes ; then + AC_DEFINE(HAVE_GCC_ASM_SPARC_MEMBAR,1,[Define if gcc asm membar supported]) +fi + +AC_CACHE_CHECK([for SPARC membar instruction with Solaris C], + pac_cv_solaris_sparc_membar,[ +AC_TRY_RUN([ +int main(int argc, char **argv){ + __asm ( "membar #StoreLoad | #StoreStore"); + exit(0); +}],pac_cv_solaris_sparc_membar=yes,pac_cv_solaris_sparc_membar=no)]) +if test "$pac_cv_solaris_sparc_membar" = yes ; then + AC_DEFINE(HAVE_SOLARIS_ASM_SPARC_MEMBAR,1,[Define if solaris asm membar supported]) +fi + +AC_CACHE_CHECK([for SPARC stbar instruction with gcc], + pac_cv_gcc_sparc_stbar,[ +AC_TRY_RUN([ +int main(int argc, char **argv){ + __asm__ __volatile__ ( "stbar" : : : "memory" ); + exit(0); +}],pac_cv_gcc_sparc_stbar=yes,pac_cv_gcc_sparc_stbar=no)]) +if test "$pac_cv_gcc_sparc_stbar" = yes ; then + AC_DEFINE(HAVE_GCC_ASM_SPARC_STBAR,1,[Define if gcc asm stbar supported]) +fi + +AC_CACHE_CHECK([for SPARC stbar instruction with Solaris C], + pac_cv_solaris_sparc_stbar,[ +AC_TRY_RUN([ +int main(int argc, char **argv){ + __asm ( "stbar" ); + exit(0); +}],pac_cv_solaris_sparc_stbar=yes,pac_cv_solaris_sparc_stbar=no)]) +if test "$pac_cv_solaris_sparc_stbar" = yes ; then + AC_DEFINE(HAVE_SOLARIS_ASM_SPARC_STBAR,1,[Define if solaris asm stbar supported]) +fi +]) \ No newline at end of file diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4 new file mode 100644 index 0000000000..da49328105 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4 @@ -0,0 +1,504 @@ +dnl +dnl Check for BSD or POSIZ style global symbol lister, nm. +dnl If found, pac_path_NM_G contains the absolute athname of nm + options +dnl pac_path_NM_G_type will be either POSIX or BSD. NM_G will be +dnl pac_path_NM_G without the absolute path. Preference is BSD style. +dnl +dnl The test checks if nm accepts the known options and also if nm produces +dnl the expected BSD or POSIX output format. +dnl +AC_DEFUN([PAC_PATH_NM_G],[ +AC_MSG_CHECKING([for BSD/POSIX style global symbol lister]) +AC_LANG_PUSH(C) +AC_PATH_PROGS_FEATURE_CHECK(NM_G, nm, [ + # Check if nm accepts -g and BSD or POSIX compatible flag. + # Use the `sed 1q' to avoid HP-UX's unknown option message: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # + # AIX's sed does not accept \+, 1) instead of doing 's|a\+||', do 's|aa*||' + # or 2) instead of 's|A \+B|AB|g', do 's|A *B|AB|g' + + # Check if nm accepts -g + case `${ac_path_NM_G} -g /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + ac_path_NM_G="${ac_path_NM_G} -g" + # Check if nm accepts -B + case `${ac_path_NM_G} -B /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([int iglobal;]) + ],[ + changequote(,) + case `${ac_path_NM_G} -B conftest.$OBJEXT | sed -e 's|[0-9][0-9]* *[A-Z] *iglobal|XXXX|g'` in + *XXXX*) + pac_path_NM_G="${ac_path_NM_G} -B" + pac_path_NM_G_type="BSD" + ;; + esac + changequote([,]) + ]) + ;; + *) + # Check if nm accepts -P + case `${ac_path_NM_G} -P /dev/null 2>&1 | sed '1q'` in + */dev/null* | *'Invalid file or object type'*) + AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([int iglobal;]) + ],[ + changequote(,) + case `${ac_path_NM_G} -P conftest.$OBJEXT | sed -e 's|iglobal *[A-Z] *[0-9][0-9]*|XXXX|g'` in + *XXXX*) + pac_path_NM_G="${ac_path_NM_G} -P" + pac_path_NM_G_type="POSIX" + ;; + esac + changequote([,]) + ]) + ;; + esac # Endof case `${ac_path_NM_G} -P + ;; + esac # Endof case `${ac_path_NM_G} -B + ;; + esac # Endof case `${ac_path_NM_G} -g + if test "X$pac_path_NM_G" != "X" ; then + AC_MSG_RESULT([$pac_path_NM_G_type style,$pac_path_NM_G]) + NM_G="`echo $pac_path_NM_G | sed -e 's|^.*nm |nm |g'`" + else + AC_MSG_RESULT(no) + fi + ac_cv_path_NM_G=${ac_path_NM_G} + ac_path_NM_G_found=: +], [AC_MSG_RESULT(no)], +[$PATH$PATH_SEPARATOR/usr/ccs/bin/elf$PATH_SEPARATOR/usr/ccs/bin$PATH_SEPARATOR/usr/ucb$PATH_SEPARATOR/bin]) +AC_LANG_POP(C) +]) dnl Endof AC_DEFUN([PAC_PATH_NM_G] +dnl +dnl PAC_C_MULTI_ATTR_ALIAS() +dnl +dnl The checks if multiple __attribute__((alias)) is available +dnl If the multiple __attribute((alias)) support is found, +dnl pac_c_multi_attr_alias=yes is set. +dnl +dnl The default is to do a runtime test. When cross_compiling=yes, +dnl pac_path_NM_G will be used to determine the test result. +dnl If CFLAGS(or CPPFLAGS) contains ATTR_ALIAS_DEBUG, the runtime will print +dnl out addresses of struct(s) for debugging purpose. +dnl +dnl +AC_DEFUN([PAC_C_MULTI_ATTR_ALIAS],[ +AC_REQUIRE([PAC_PATH_NM_G]) +AC_LANG_PUSH(C) +AC_CHECK_HEADERS([stdio.h]) +AC_MSG_CHECKING([for multiple __attribute__((alias)) support]) + +#Compile the "other" __attribute__ object file. +AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ +#if defined(HAVE_STDIO_H) || defined(STDC_HEADERS) +#include +#endif + +struct mpif_cmblk_t_ { int imember; }; +typedef struct mpif_cmblk_t_ mpif_cmblk_t; + +/* NOT initialize these structure so it appears in BSS or as COMMON symbols */ +mpif_cmblk_t mpifcmb; +mpif_cmblk_t MPIFCMB; + +/* + Do the test in this file instead in the file + where __attribute__((alias)) is used. + This is needed for pgcc since pgcc seems to + define aliased symbols if they are in the same file. +*/ +/* + We can't do the following comparision in one test: + + ilogical = (( &mpifcmb == ptr && &MPIFCMB == ptr ) ? TRUE : FALSE) ; + + because some compiler, like gcc 4.4.2's -O* optimizes the code + such that the ilogical expression is FALSE. The likely reason is that + mpifcmb and MPIFCMB are defined in the same scope in which C optimizer + may have treated them as different objects (with different addresses), + &mpifcmb != &MPIFCMB, before actually running the test and hence the + illogical expression is assumed to be always FALSE. The solution taken + here is to prevent the optimizer the opportunity to equate &mpifcmb and + &MPIFCMB (in same scope), e.g. using 2 separate tests and combine the + test results in a different scope. +*/ +int same_addrs1( void *ptr ); +int same_addrs1( void *ptr ) +{ +#if defined(ATTR_ALIAS_DEBUG) + printf( "others: addr(mpifcmb)=%p, addr(input ptr)=%p\n", &mpifcmb, ptr ); +#endif + return ( &mpifcmb == ptr ? 1 : 0 ); +} + +int same_addrs2( void *ptr ); +int same_addrs2( void *ptr ) +{ +#if defined(ATTR_ALIAS_DEBUG) + printf( "others: addr(MPIFCMB)=%p, addr(input ptr)=%p\n", &MPIFCMB, ptr ); +#endif + return ( &MPIFCMB == ptr ? 1 : 0 ); +} + + ]) +],[ + rm -f pac_conftest_other.$OBJEXT + PAC_RUNLOG([cp conftest.$OBJEXT pac_conftest_other.$OBJEXT]) + test -s pac_conftest_other.$OBJEXT && pac_c_attr_alias_other=yes +dnl cp conftest.$ac_ext pac_conftest_other.$ac_ext +dnl echo +dnl echo "pac_conftest_other.$OBJEXT" +dnl nm -P -g pac_conftest_other.$OBJEXT | grep -i "mpifcmb" +],[ + pac_c_attr_alias_other=no +]) dnl Endof AC_COMPILE_IFELSE + +pac_c_attr_alias_main=no +if test "$pac_c_attr_alias_other" = "yes" ; then + +# Save LIBS for later restoration. + saved_LIBS="$LIBS" + LIBS="pac_conftest_other.$OBJEXT $LIBS" + +# Link the "other" __attribute__ object file. + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([ +#if defined(HAVE_STDIO_H) || defined(STDC_HEADERS) +#include +#endif + +struct mpif_cmblk_t_ { int imember; }; +typedef struct mpif_cmblk_t_ mpif_cmblk_t; + +mpif_cmblk_t mpifcmbr = {0}; +extern mpif_cmblk_t MPIFCMB __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t mpifcmb __attribute__ ((alias("mpifcmbr"))); + +extern int same_addrs1( void *ptr ); +extern int same_addrs2( void *ptr ); + + ],[ + int iaddr = 0; +#if defined(ATTR_ALIAS_DEBUG) + printf( "main: addr(mpifcmbr) = %p\n", &mpifcmbr ); + printf( "main: addr(mpifcmb) = %p\n", &mpifcmb ); + printf( "main: addr(MPIFCMB) = %p\n", &MPIFCMB ); +#endif + iaddr = same_addrs1( &mpifcmbr ) && same_addrs2( &mpifcmbr ); + FILE *file = fopen( "pac_conftestval", "w" ); + if (!file) return 1; + fprintf( file, "%d\n", iaddr ); + ]) + ],[ + rm -f pac_conftest_main$EXEEXT + PAC_RUNLOG([cp conftest$EXEEXT pac_conftest_main$EXEEXT]) + test -x pac_conftest_main$EXEEXT && pac_c_attr_alias_main=yes +dnl cp conftest.$ac_ext pac_conftest_main.$ac_ext +dnl echo +dnl echo "pac_conftest_main$EXEEXT" +dnl nm -P -g pac_conftest_main$EXEEXT | grep -i "mpifcmb" + ],[ + pac_c_attr_alias_main=no +dnl cp conftest.$ac_ext pac_conftest_main.$ac_ext + ]) dnl Endof AC_LINK_IFELSE + +# Restore the previously saved LIBS + LIBS="$saved_LIBS" + rm -f pac_conftest_other.$OBJEXT +fi dnl Endof if test "$pac_c_attr_alias_other" = "yes" + +if test "$pac_c_attr_alias_main" = "yes" ; then + if test "$cross_compiling" = "yes" ; then + changequote(,) + # echo "PAC CROSS-COMPILING" dnl + # POSIX NM = nm -P format dnl + if test "$pac_path_NM_G_type" = "POSIX" ; then + addrs=`${pac_path_NM_G} ./pac_conftest_main$EXEEXT | grep -i mpifcmb | sed -e 's% *[a-zA-Z][a-zA-Z]* *[a-zA-Z] *\([0-9abcdef][0-9abcdef]*\).*%\1%g'` + fi + + # BSD NM = nm -B format dnl + if test "$pac_path_NM_G_type" = "BSD" ; then + addrs=`${pac_path_NM_G} -g ./pac_conftest_main$EXEEXT | grep -i mpifcmb | sed -e "s% *\([0-9abcdef][0-9abcdef]*\) *[a-zA-Z] *[a-zA-Z][a-zA-A]*.*%\1%g"` + fi + changequote([,]) + + cmp_addr="" + diff_addrs=no + for addr in ${addrs} ; do + if test "X${cmp_addr}" != "X" ; then + if test "${cmp_addr}" != "${addr}" ; then + diff_addrs=yes + break + fi + else + cmp_addr=${addr} + fi + done + + if test "$diff_addrs" != "yes" ; then + dnl echo "Same addresses. Multiple aliases support" + AC_MSG_RESULT([${NM_G} says yes]) + pac_c_multi_attr_alias=yes + else + dnl echo "Different addresses. No multiple aliases support." + AC_MSG_RESULT([${NM_G} says no]) + pac_c_multi_attr_alias=no + fi + + else # if test "$cross_compiling" != "yes" + rm -f pac_conftestval + ac_try="./pac_conftest_main$EXEEXT" + if AC_TRY_EVAL(ac_try) ; then + pac_c_attr_alias_val=0 + if test -s pac_conftestval ; then + eval pac_c_attr_alias_val=`cat pac_conftestval` + fi + if test "$pac_c_attr_alias_val" -eq 1 ; then + AC_MSG_RESULT(yes) + pac_c_multi_attr_alias=yes + else + AC_MSG_RESULT(no) + pac_c_multi_attr_alias=no + fi + rm -f pac_conftestval + fi + fi + dnl Endof if test "$cross_compiling" = "yes" + rm -f pac_conftest_main$EXEEXT +else + AC_MSG_RESULT(no! link failure) + pac_c_multi_attr_alias=no +fi dnl Endof if test "$pac_c_attr_alias_main" = "yes" + +AC_LANG_POP(C) + +]) dnl Endof AC_DEFUN([PAC_C_MULTI_ATTR_ALIAS] +dnl +dnl PAC_C_ATTR_ALIGNED() +dnl +dnl Check if __attribute__((aligned)) support is there. +dnl If so, set pac_c_attr_aligned=yes. +dnl +dnl Do a link test instead of compile test to check if the linker +dnl would emit an error. +dnl +AC_DEFUN([PAC_C_ATTR_ALIGNED],[ +AC_LANG_PUSH(C) +AC_MSG_CHECKING([for __attribute__((aligned)) support]) +#Link the __attribute__ object file. +AC_LINK_IFELSE([ + AC_LANG_PROGRAM([ +struct mpif_cmblk_t_ { int imembers[5]; }; +typedef struct mpif_cmblk_t_ mpif_cmblk_t; +mpif_cmblk_t mpifcmbr __attribute__((aligned)) = {0}; + ],[]) +],[pac_c_attr_aligned=yes], [pac_c_attr_aligned=no]) +AC_MSG_RESULT([$pac_c_attr_aligned]) +AC_LANG_POP(C) +]) +dnl +dnl PAC_F2C_ATTR_ALIGNED_SIZE(ARRAY_SIZE, [OUTPUT_VAR], [MIN_ALIGNMENT]) +dnl +dnl ARRAY_SIZE : Size of the integer array within the fortran commmon block. +dnl OUTPUT_VAR : Optional variable to be set. +dnl if test succeeds, set OUTPUT_VAR=$pac_f2c_attr_aligned_str. +dnl if test fails, set OUTPUT_VAR="unknown". +dnl MIN_ALIGNMENT : Optional value. +dnl Minimum alignment size to be used in OUTPUT_VAR. +dnl pac_f2c_attr_aligned_str won't be modified. +dnl +dnl "pac_f2c_attr_aligned_str" will be set with +dnl 1) __attribute__((aligned(ALIGNMENT_SIZE))), +dnl 2) __attribute__((aligned)). +dnl 3) "", i.e. empty string. +dnl +dnl 2) means the test can't find a good alignment value, but both the Fortran +dnl and C compilers are OK with "aligned" which in principle means the C +dnl compiler will pick the maximum useful alignment supported by the +dnl architecture. +dnl 3) means that the test has failed to find the alignment. +dnl +AC_DEFUN([PAC_F2C_ATTR_ALIGNED_SIZE],[ +cmblksize=$1 +AC_MSG_CHECKING([the minimum alignment of Fortran common block of $cmblksize integers]) +dnl To find the minmium alignment of Fortran common block (of integer array) +dnl as seen by C object file, C object files of various (typical) alignments +dnl are linked to the Fortran code using the common block of integer array. +# +dnl Since the incorrect alignment results only a warning messages from the +dnl fortran compiler(or linker), so we use "diff" to compare the fortran +dnl compiler/linker output. We cannot use AC_LANG_WERROR, +dnl i.e. ac_fc_werror_flag=yes, because compiler like pgf77 at version 10.x) +dnl has non-zero stderr output if a fortran program is used in the linking. +dnl The stderr contains the name of fortran program even if the linking is +dnl successful. We could avoid the non-zero stderr output in pgf77 by +dnl compiling everthing into object files and linking all the object files +dnl with pgf77. Doing that would need us to use AC_TRY_EVAL instead of +dnl AC_LINK_IFELSE, so "diff" approach is used instead. +# +dnl Using diff of compiler(linker) output requires a reference output file +dnl as the base of diff. The process of creating this reference output file +dnl has to be exactly the same as the testing process, because pgf77 has +dnl the following weird behavour +dnl pgf77 -o ftest ftest.f => when $?=0 with zero stderr output +dnl pgf77 -o ftest ftest.f dummy.o => when $?=0 with non-zero stderr output. +dnl stderr has "ftest.f:". +dnl +# First create a fortran CONFTEST which will be used repeatedly. +AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77]) +AC_LANG_CONFTEST([ + AC_LANG_SOURCE([ + program fconftest + integer isize + parameter (isize=$cmblksize) + integer status_array(isize) + common /mpifcmb/ status_array + save /mpifcmb/ + end + ]) +]) +AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77]) +dnl +dnl +dnl +# Compile a C dummy.$OBJEXT and link with Fortran test program to create +# a reference linker output file, pac_align0.log, as the base of "diff". +AC_LANG_PUSH([C]) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([])],[ + cp conftest.$ac_ext pac_conftest.c + PAC_RUNLOG([mv conftest.$OBJEXT pac_conftest.$OBJEXT]) + saved_LIBS="$LIBS" + LIBS="pac_conftest.$OBJEXT $LIBS" + AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77]) + saved_ac_link="$ac_link" + ac_link="`echo $saved_ac_link | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`" + pac_logfile="pac_align0.log" + rm -f $pac_logfile + AC_LINK_IFELSE([],[ + pac_f2c_alignedn_diffbase=yes + ],[ + pac_f2c_alignedn_diffbase=no + ]) + # Be sure NOT to remove the conftest.f which is still needed for later use. + # rm -f conftest.$ac_ext + # Restore everything in autoconf that has been overwritten + ac_link="$saved_ac_link" + # restore previously saved LIBS + LIBS="$saved_LIBS" + AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77]) +],[ + pac_f2c_alignedn_diffbase=no +]) +AC_LANG_POP([C]) +dnl +dnl +if test "$pac_f2c_alignedn_diffbase" = "yes" ; then + # Initialize pac_result_str to empty string since part of the test + # depends on pac_result_str is empty or non-empty string. + pac_result_str="" + # Initialize pac_f2c_attr_aligned_str to empty string and + # it will remain as empty string if the following test fails. + pac_f2c_attr_aligned_str="" + for asize in 4 8 16 32 64 128 max ; do + if test "$asize" != "max" ; then + pac_attr_aligned_str="__attribute__((aligned($asize)))" + else + pac_attr_aligned_str="__attribute__((aligned))" + fi + AC_LANG_PUSH([C]) + #Compile the __attribute__ object file. + AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ +changequote(,) +struct mpif_cmblk_t_ { $MPI_FINT imembers[$cmblksize]; }; +changequote([,]) +typedef struct mpif_cmblk_t_ mpif_cmblk_t; +mpif_cmblk_t mpifcmbr $pac_attr_aligned_str = {0}; + +extern mpif_cmblk_t _CMPIFCMB __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t MPIFCMB __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t MPIFCMB_ __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t _Cmpifcmb __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t mpifcmb __attribute__ ((alias("mpifcmbr"))); +extern mpif_cmblk_t mpifcmb_ __attribute__ ((alias("mpifcmbr"))); + ]) + ],[ + cp conftest.$ac_ext pac_conftest.c + PAC_RUNLOG([mv conftest.$OBJEXT pac_conftest.$OBJEXT]) + saved_LIBS="$LIBS" + LIBS="pac_conftest.$OBJEXT $LIBS" + AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77]) + saved_ac_link="$ac_link" + ac_link="`echo $saved_ac_link | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`" + pac_logfile="pac_align1.log" + rm -f $pac_logfile + # Use conftest.f created in CONFTEST. + AC_LINK_IFELSE([],[ + PAC_RUNLOG_IFELSE([diff -b pac_align0.log pac_align1.log],[ + pac_attr_alignedn=yes + ],[ + pac_attr_alignedn=no + cat $pac_logfile >&AS_MESSAGE_LOG_FD + echo "failed C program was:" >&AS_MESSAGE_LOG_FD + cat pac_conftest.c >&AS_MESSAGE_LOG_FD + echo "failed Fortran program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&AS_MESSAGE_LOG_FD + ]) + ],[ + pac_attr_alignedn=no + ]) + # Restore everything in autoconf that has been overwritten + ac_link="$saved_ac_link" + # restore previously saved LIBS + LIBS="$saved_LIBS" + AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77]) + # remove previously generated object file and C file. + rm -f pac_conftest.$OBJEXT pac_conftest.c + rm -f $pac_logfile + if test "$pac_attr_alignedn" = yes ; then + ifelse([$3],[],[ + pac_result_str="$asize" + pac_f2c_attr_aligned_str="$pac_attr_aligned_str" + break + ],[ + if test "$asize" != "max" -a "$asize" -lt "$3" ; then + if test "X$pac_result_str" = "X" ; then + pac_result_str="$asize" + pac_f2c_attr_aligned_str="$pac_attr_aligned_str" + fi + continue + else + pac_f2c_attr_aligned_str="$pac_attr_aligned_str" + if test "X$pac_result_str" != "X" ; then + pac_result_str="$pac_result_str, too small! reset to $asize" + else + pac_result_str="$asize" + fi + break + fi + ]) + fi + ], [ + pac_attr_alignedn=no + ]) + AC_LANG_POP([C]) + done + ifelse([$2],[],[],[$2="$pac_f2c_attr_aligned_str"]) +else + pac_result_str="" + # Since the test fails, set pac_f2c_attr_aligned_str to empty string. + pac_f2c_attr_aligned_str="" +fi +if test "X$pac_result_str" != "X" ; then + AC_MSG_RESULT([$pac_result_str]) +else + AC_MSG_RESULT([unknown]) +fi +rm -f pac_align0.log +]) +dnl diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4 new file mode 100644 index 0000000000..f580d3919a --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4 @@ -0,0 +1,75 @@ +dnl This internal macro fails to work properly with OTHER internal macros. +dnl Basically, if the prologue is [], then no message should be generated. +dnl This macro is in autoconf 2.52 +m4_define([AC_LANG_PROGRAM(Fortran 77)], +[m4_if([$1],[[[]]],,[m4_ifval([$1], + [m4_warn([syntax], [$0: ignoring PROLOGUE: $1])])])dnl + program main +$2 + end]) + + +dnl/*D +dnl PAC_PROG_CHECK_INSTALL_WORKS - Check whether the install program in INSTALL +dnl works. +dnl +dnl Synopsis: +dnl PAC_PROG_CHECK_INSTALL_WORKS +dnl +dnl Output Effect: +dnl Sets the variable 'INSTALL' to the value of 'ac_sh_install' if +dnl a file cannot be installed into a local directory with the 'INSTALL' +dnl program +dnl +dnl Notes: +dnl The 'AC_PROG_INSTALL' scripts tries to avoid broken versions of +dnl install by avoiding directories such as '/usr/sbin' where some +dnl systems are known to have bad versions of 'install'. Unfortunately, +dnl this is exactly the sort of test-on-name instead of test-on-capability +dnl that 'autoconf' is meant to eliminate. The test in this script +dnl is very simple but has been adequate for working around problems +dnl on Solaris, where the '/usr/sbin/install' program (known by +dnl autoconf to be bad because it is in /usr/sbin) is also reached by a +dnl soft link through /bin, which autoconf believes is good. +dnl +dnl No variables are cached to ensure that we do not make a mistake in +dnl our choice of install program. +dnl +dnl The Solaris configure requires the directory name to immediately +dnl follow the '-c' argument, rather than the more common +dnl.vb +dnl args sourcefiles destination-dir +dnl.ve +dnl D*/ +AC_DEFUN([PAC_PROG_CHECK_INSTALL_WORKS],[ +if test -z "$INSTALL" ; then + AC_MSG_RESULT([No install program available]) +else + # first make any "confdb/install-sh -c" into an absolute path + # this is a hack, but it's still much cleaner than anything else I could + # come up with (see tt#1007) [goodell@] + AS_CASE(["$INSTALL"], + [/*],[:], + [*install-sh*],[INSTALL="$master_top_srcdir/$INSTALL"]) + + # Check that this install really works + rm -f conftest + echo "Test file" > conftest + if test ! -d .conftest ; then mkdir .conftest ; fi + AC_MSG_CHECKING([whether install works]) + if $INSTALL conftest .conftest >/dev/null 2>&1 ; then + installOk=yes + else + installOk=no + fi + rm -rf .conftest conftest + AC_MSG_RESULT($installOk) + if test "$installOk" = no ; then + if test -n "$ac_install_sh" ; then + INSTALL=$ac_install_sh + else + AC_MSG_ERROR([Unable to find working install]) + fi + fi +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4 new file mode 100644 index 0000000000..9d02c41638 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4 @@ -0,0 +1,372 @@ +dnl +dnl/*D +dnl AC_CACHE_LOAD - Replacement for autoconf cache load +dnl +dnl Notes: +dnl Caching in autoconf is broken (at least through version 2.13). +dnl The problem is that the cache is read +dnl without any check for whether it makes any sense to read it. +dnl A common problem is a build on a shared file system; connecting to +dnl a different computer and then building within the same directory will +dnl lead to at best error messages from configure and at worse a build that +dnl is wrong but fails only at run time (e.g., wrong datatype sizes used). +dnl Later versions of autoconf do include some checks for changes in the +dnl environment that impact the choices, but still misses problems with +dnl multiple different systems. +dnl +dnl This fixes that by requiring the user to explicitly enable caching +dnl before the cache file will be loaded. +dnl +dnl To use this version of 'AC_CACHE_LOAD', you need to include +dnl 'aclocal_cache.m4' in your 'aclocal.m4' file. The sowing 'aclocal.m4' +dnl file includes this file. +dnl +dnl If no --enable-cache or --disable-cache option is selected, the +dnl command causes configure to keep track of the system being configured +dnl in a config.system file; if the current system matches the value stored +dnl in that file (or there is neither a config.cache nor config.system file), +dnl configure will enable caching. In order to ensure that the configure +dnl tests make sense, the values of CC, F77, F90, and CXX are also included +dnl in the config.system file. In addition, the value of PATH is included +dnl to ensure that changes in the PATH that might select a different version +dnl of a program with the same name (such as a native make versus gnumake) +dnl are detected. +dnl +dnl Bugs: +dnl This does not work with the Cygnus configure because the enable arguments +dnl are processed *after* AC_CACHE_LOAD (!). To address this, we avoid +dnl changing the value of enable_cache, and use real_enable_cache, duplicating +dnl the "notgiven" value. +dnl +dnl The environment variable CONFIGURE_DEBUG_CACHE, if set to yes, +dnl will cause additional data to be written out during the configure process. +dnl This can be helpful in debugging the cache file process. +dnl +dnl See Also: +dnl PAC_ARG_CACHING +dnl D*/ +define([AC_CACHE_LOAD], +[if test "$CONFIGURE_DEBUG_CACHE" = yes ; then + oldopts="$-" + clearMinusX=no + set -x + if test "$oldopts" != "$-" ; then + clearMinusX=yes + fi +fi +if test "X$cache_system" = "X" ; then + # A default file name, just in case + cache_system="config.system" + if test "$cache_file" != "/dev/null" ; then + # Get the directory for the cache file, if any + changequote(,) + dnl Be careful to ensure that there is no doubled slash + cache_system=`echo $cache_file | sed -e 's%^\(.*/\)[^/]*%\1config.system%'` + changequote([,]) + test "x$cache_system" = "x$cache_file" && cache_system="config.system" +# else +# We must *not* set enable_cache to no because we need to know if +# enable_cache was not set. +# enable_cache=no + fi +fi +dnl +dnl The "action-if-not-given" part of AC_ARG_ENABLE is not executed until +dnl after the AC_CACHE_LOAD is executed (!). Thus, the value of +dnl enable_cache if neither --enable-cache or --disable-cache is selected +dnl is null. Just in case autoconf ever fixes this, we test both cases. +dnl +dnl Include PATH in the cache.system file since changing the path can +dnl change which versions of programs are found (such as vendor make +dnl or GNU make). +dnl +# +# Get a test value and flag whether we should remove/replace the +# cache_system file (do so unless cache_system_ok is yes) +# FC and F77 should be synonyms. Save both in case +# We include the xxxFLAGS in case the user is using the flags to change +# the language (either input or output) of the compiler. E.g., +# using -xarch=v9 on Solaris to select 64 bit output or using -D_BSD_SOURCE +# with gcc to get different header files on input. +cleanargs=`echo "$CC $F77 $FC $CXX $F90 $CFLAGS $FFLAGS $CXXFLAGS $F90FLAGS $PATH" | tr '"' ' '` +if uname -srm >/dev/null 2>&1 ; then + cache_system_text="`uname -srm` $cleanargs" +else + cache_system_text="-no-uname- $cleanargs" +fi +cache_system_ok=no +# +if test -z "$real_enable_cache" ; then + real_enable_cache=$enable_cache + if test -z "$real_enable_cache" ; then real_enable_cache="notgiven" ; fi +fi +if test "X$real_enable_cache" = "Xnotgiven" ; then + # check for valid cache file + if test -z "$cache_system" ; then cache_system="config.system" ; fi + if uname -srm >/dev/null 2>&1 ; then + if test -f "$cache_system" -a -n "$cache_system_text" ; then + if test "$cache_system_text" = "`cat $cache_system`" ; then + real_enable_cache="yes" + cache_system_ok=yes + fi + elif test ! -f "$cache_system" -a -n "$cache_system_text" ; then + # remove the cache file because it may not correspond to our + # system + if test "$cache_file" != "/dev/null" ; then + rm -f $cache_file + fi + real_enable_cache="yes" + fi + fi +fi +if test "X$real_enable_cache" = "Xyes" -a "$cache_file" = "/dev/null" ; then + real_enable_cache=no +fi +if test "X$real_enable_cache" = "Xyes" ; then + if test -r "$cache_file" ; then + echo "loading cache $cache_file" + if test -w "$cache_file" ; then + # Clean the cache file (ergh) + PAC_CACHE_CLEAN + fi + . $cache_file + else + echo "Configure in `pwd` creating cache $cache_file" + > $cache_file + rm -f $cache_system + fi +else + cache_file="/dev/null" +fi +# Remember our location and the name of the cachefile +pac_cv_my_conf_dir=`pwd` +dnl do not include the cachefile name, since this may contain the process +dnl number and cause comparisons looking for changes to the cache file +dnl to detect a change that isn't real. +dnl pac_cv_my_cachefile=$cachefile +# +# Update the cache_system file if necessary +if test "$cache_system_ok" != yes ; then + if test -n "$cache_system" ; then + rm -f $cache_system + echo $cache_system_text > $cache_system + fi +fi +if test "$clearMinusX" = yes ; then + set +x +fi +]) +dnl +dnl/*D +dnl PAC_ARG_CACHING - Enable caching of results from a configure execution +dnl +dnl Synopsis: +dnl PAC_ARG_CACHING +dnl +dnl Output Effects: +dnl Adds '--enable-cache' and '--disable-cache' to the command line arguments +dnl accepted by 'configure'. +dnl +dnl See Also: +dnl AC_CACHE_LOAD +dnl D*/ +dnl Add this call to the other ARG_ENABLE calls. Note that the values +dnl set here are redundant; the LOAD_CACHE call relies on the way autoconf +dnl initially processes ARG_ENABLE commands. +AC_DEFUN([PAC_ARG_CACHING],[ +AC_ARG_ENABLE(cache, + AC_HELP_STRING([--enable-cache], [Turn on configure caching]),, + [enable_cache="notgiven"]) +]) + +dnl Clean the cache of extraneous quotes that AC_CACHE_SAVE may add +dnl +AC_DEFUN([PAC_CACHE_CLEAN],[ + rm -f confcache + sed -e "s/'\\\\''//g" -e "s/'\\\\/'/" -e "s/\\\\'/'/" \ + -e "s/'\\\\''//g" $cache_file > confcache + if cmp -s $cache_file confcache ; then + : + else + if test -w $cache_file ; then + echo "updating cache $cache_file" + cat confcache > $cache_file + else + echo "not updating unwritable cache $cache_file" + fi + fi + rm -f confcache + if test "$DEBUG_AUTOCONF_CACHE" = "yes" ; then + echo "Results of cleaned cache file:" + echo "--------------------------------------------------------" + cat $cache_file + echo "--------------------------------------------------------" + fi +]) + +dnl/*D +dnl PAC_SUBDIR_CACHE - Create a cache file before ac_output for subdirectory +dnl configures. +dnl +dnl Synopsis: +dnl PAC_SUBDIR_CACHE(when) +dnl +dnl Input Parameter: +dnl . when - Indicates when the cache should be created (optional) +dnl If 'always', create a new cache file. This option +dnl should be used if any of the cache parameters (such as +dnl CFLAGS or LDFLAGS) may have changed. +dnl +dnl Output Effects: +dnl +dnl Create a cache file before ac_output so that subdir configures don't +dnl make mistakes. +dnl We can't use OUTPUT_COMMANDS to remove the cache file, because those +dnl commands are executed *before* the subdir configures. +dnl +dnl D*/ +AC_DEFUN([PAC_SUBDIR_CACHE],[]) +AC_DEFUN([PAC_SUBDIR_CACHE_OLD],[ +if test "x$1" = "xalways" -o \( "$cache_file" = "/dev/null" -a "X$real_enable_cache" = "Xnotgiven" \) ; then + # Use an absolute directory to help keep the subdir configures from getting + # lost + cache_file=`pwd`/$$conf.cache + touch $cache_file + dnl + dnl For Autoconf 2.52+, we should ensure that the environment is set + dnl for the cache. Make sure that we get the values and set the + dnl xxx_set variables properly + ac_cv_env_CC_set=set + ac_cv_env_CC_value=$CC + ac_cv_env_CFLAGS_set=${CFLAGS+set} + ac_cv_env_CFLAGS_value=$CFLAGS + ac_cv_env_CPP_set=set + ac_cv_env_CPP_value=$CPP + ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} + ac_cv_env_CPPFLAGS_value=$CPPFLAGS + ac_cv_env_LDFLAGS_set=${LDFLAGS+set} + ac_cv_env_LDFLAGS_value=$LDFLAGS + ac_cv_env_LIBS_set=${LIBS+set} + ac_cv_env_LIBS_value=$LIBS + ac_cv_env_FC_set=${FC+set} + ac_cv_env_FC_value=$FC + ac_cv_env_F77_set=${F77+set} + ac_cv_env_F77_value=$F77 + ac_cv_env_F90_set=${F90+set} + ac_cv_env_F90_value=$F90 + ac_cv_env_FFLAGS_set=${FFLAGS+set} + ac_cv_env_FFLAGS_value=$FFLAGS + ac_cv_env_CXX_set=${CXX+set} + ac_cv_env_CXX_value=$CXX + + ac_env_CC_set=set + ac_env_CC_value=$CC + ac_env_CFLAGS_set=${CFLAGS+set} + ac_env_CFLAGS_value=$CFLAGS + ac_env_CPP_set=set + ac_env_CPP_value=$CPP + ac_env_CPPFLAGS_set=${CPPFLAGS+set} + ac_env_CPPFLAGS_value=$CPPFLAGS + ac_env_LDFLAGS_set=${LDFLAGS+set} + ac_env_LDFLAGS_value=$LDFLAGS + ac_env_LIBS_set=${LIBS+set} + ac_env_LIBS_value=$LIBS + ac_env_FC_set=${FC+set} + ac_env_FC_value=$FC + ac_env_F77_set=${F77+set} + ac_env_F77_value=$F77 + ac_env_F90_set=${F90+set} + ac_env_F90_value=$F90 + ac_env_FFLAGS_set=${FFLAGS+set} + ac_env_FFLAGS_value=$FFLAGS + ac_env_CXX_set=${CXX+set} + ac_env_CXX_value=$CXX + + dnl other parameters are + dnl build_alias, host_alias, target_alias + + # It turns out that A C CACHE_SAVE can't be invoked more than once + # with data that contains blanks. What happens is that the quotes + # that it adds get quoted and then added again. To avoid this, + # we strip off the outer quotes for all cached variables + dnl We add pac_cv_my_conf_dir to give the source of this cachefile, + dnl and pac_cv_my_cachefile to indicate how it chose the cachefile. + pac_cv_my_conf_dir=`pwd` + pac_cv_my_cachefile=$cachefile + AC_CACHE_SAVE + PAC_CACHE_CLEAN + ac_configure_args="$ac_configure_args -enable-cache" +fi +dnl Unconditionally export these values. Subdir configures break otherwise +export CC +export CFLAGS +export LDFLAGS +export LIBS +export CPPFLAGS +export CPP +export FC +export F77 +export F90 +export CXX +export FFLAGS +export CCFLAGS +]) +AC_DEFUN([PAC_SUBDIR_CACHE_CLEANUP],[]) +AC_DEFUN([PAC_SUBDIR_CACHE_CLEANUP_OLD],[ +if test "$cache_file" != "/dev/null" -a "X$real_enable_cache" = "Xnotgiven" ; then + rm -f $cache_file + cache_file=/dev/null +fi +]) +dnl +dnl The following three macros support the sharing of configure results +dnl by configure scripts, including ones that are not run with +dnl AC_CONFIG_SUBDIRS (the cachefiles managed by --enable-cache can +dnl only be used with AC_CONFIG_SUBDIRS; creating a autoconf-style +dnl cachefile before the the end of the autoconf process will often +dnl cause problems. +dnl +AC_DEFUN([PAC_CREATE_BASE_CACHE],[ +AC_ARG_ENABLE(base-cache, + AC_HELP_STRING([--enable-base-cache], + [Enable the use of a simple cache for the subsidiary + configure scripts]),,enable_base_cache=default) +# The default case is controlled by the environment variable CONF_USE_CACHEFILE +if test "$enable_base_cache" = "default" ; then + if test "$CONF_USE_CACHEFILE" = yes ; then + enable_base_cache=yes + else + enable_base_cache=no + fi +fi +if test "$enable_base_cache" != no ; then + if test "$enable_base_cache" = yes ; then + basecachefile=`pwd`/cache.base + else + basecachefile=`pwd`/$enable_base_cache + fi + set | grep ac_cv > $basecachefile + # Tell other configures to load this file + echo "Creating and exporting the base cache file $basecachefile" + CONF_BASE_CACHEFILE=$basecachefile + export CONF_BASE_CACHEFILE +fi +]) +AC_DEFUN([PAC_LOAD_BASE_CACHE],[ +if test -n "$CONF_BASE_CACHEFILE" -a -s "$CONF_BASE_CACHEFILE" ; then + echo "Loading base cachefile $CONF_BASE_CACHEFILE" + . $CONF_BASE_CACHEFILE + export CONF_BASE_CACHEFILE +fi +]) +AC_DEFUN([PAC_UPDATE_BASE_CACHE],[ +if test -n "$CONF_BASE_CACHEFILE" -a -s "$CONF_BASE_CACHEFILE" ; then + set | grep ac_cv > $CONF_BASE_CACHEFILE.new + if cmp -s $CONF_BASE_CACHEFILE.new $CONF_BASE_CACHEFILE ; then + : + else + echo "Replacing $CONF_BASE_CACHEFILE" + mv $CONF_BASE_CACHEFILE.new $CONF_BASE_CACHEFILE + fi +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4 new file mode 100644 index 0000000000..91b26de324 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4 @@ -0,0 +1,1596 @@ +dnl AC_PROG_CC_GNU +ifdef([AC_PROG_CC_GNU],,[AC_DEFUN([AC_PROG_CC_GNU],)]) + +dnl PAC_PROG_CC - reprioritize the C compiler search order +AC_DEFUN([PAC_PROG_CC],[ + PAC_PUSH_FLAG([CFLAGS]) + AC_PROG_CC([gcc icc pgcc xlc xlC pathcc cc]) + PAC_POP_FLAG([CFLAGS]) +]) + +dnl +dnl/*D +dnl PAC_C_CHECK_COMPILER_OPTION - Check that a compiler option is accepted +dnl without warning messages +dnl +dnl Synopsis: +dnl PAC_C_CHECK_COMPILER_OPTION(optionname,action-if-ok,action-if-fail) +dnl +dnl Output Effects: +dnl +dnl If no actions are specified, a working value is added to 'COPTIONS' +dnl +dnl Notes: +dnl This is now careful to check that the output is different, since +dnl some compilers are noisy. +dnl +dnl We are extra careful to prototype the functions in case compiler options +dnl that complain about poor code are in effect. +dnl +dnl Because this is a long script, we have ensured that you can pass a +dnl variable containing the option name as the first argument. +dnl +dnl gcc 4.2.4 on 32-bit does not complain about the -Wno-type-limits option +dnl even though it doesn't support it. However, when another warning is +dnl triggered, it gives an error that the option is not recognized. So we +dnl need to test with a conftest file that will generate warnings +dnl D*/ +AC_DEFUN([PAC_C_CHECK_COMPILER_OPTION],[ +AC_MSG_CHECKING([whether C compiler accepts option $1]) +pccco_save_CFLAGS="$CFLAGS" +CFLAGS="$1 $CFLAGS" +rm -f conftest.out +pac_success=no +# conftest3.c has an invalid prototype to ensure we generate warnings +echo 'int main(){}' > conftest3.c +echo 'int foo(void);int foo(void){return 0;}' > conftest2.c +echo 'int main(void);int main(void){return 0;}' > conftest.c +if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest3.c $LDFLAGS >/dev/null 2>&1 && + ${CC-cc} $pccco_save_CFLAGS $CPPFLAGS -o conftest conftest.c $LDFLAGS >conftest.bas 2>&1 ; then + if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest.c $LDFLAGS >conftest.out 2>&1 ; then + if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then + AC_MSG_RESULT(yes) + AC_MSG_CHECKING([whether routines compiled with $1 can be linked with ones compiled without $1]) + rm -f conftest.out + rm -f conftest.bas + if ${CC-cc} -c $pccco_save_CFLAGS $CPPFLAGS conftest2.c >conftest2.out 2>&1 ; then + if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest2.o conftest.c $LDFLAGS >conftest.bas 2>&1 ; then + if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest2.o conftest.c $LDFLAGS >conftest.out 2>&1 ; then + if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then + pac_success=yes + else + : + fi + else + : + fi + else + # Could not link with the option! + : + fi + else + if test -s conftest2.out ; then + cat conftest2.out >&AC_FD_CC + fi + fi + else + : + fi + else + : + fi +else + # Could not compile without the option! + : +fi +CFLAGS="$pccco_save_CFLAGS" +if test "$pac_success" = yes ; then + AC_MSG_RESULT(yes) + ifelse($2,,COPTIONS="$COPTIONS $1",$2) +else + AC_MSG_RESULT(no) + if test -s conftest.out ; then cat conftest.out >&AC_FD_CC ; fi + $3 +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +]) +dnl +dnl/*D +dnl PAC_C_OPTIMIZATION - Determine C options for producing optimized code +dnl +dnl Synopsis +dnl PAC_C_OPTIMIZATION([action if found]) +dnl +dnl Output Effect: +dnl Adds options to 'COPTIONS' if no other action is specified +dnl +dnl Notes: +dnl This is a temporary standin for compiler optimization. +dnl It should try to match known systems to known compilers (checking, of +dnl course), and then falling back to some common defaults. +dnl Note that many compilers will complain about -g and aggressive +dnl optimization. +dnl D*/ +AC_DEFUN([PAC_C_OPTIMIZATION],[ + for copt in "-O4 -Ofast" "-Ofast" "-fast" "-O3" "-xO3" "-O" ; do + PAC_C_CHECK_COMPILER_OPTION($copt,found_opt=yes,found_opt=no) + if test "$found_opt" = "yes" ; then + ifelse($1,,COPTIONS="$COPTIONS $copt",$1) + break + fi + done + if test "$ac_cv_prog_gcc" = "yes" ; then + for copt in "-fomit-frame-pointer" "-finline-functions" \ + "-funroll-loops" ; do + PAC_C_CHECK_COMPILER_OPTION($copt,found_opt=yes,found_opt=no) + if test "$found_opt" = "yes" ; then + ifelse($1,,COPTIONS="$COPTIONS $copt",$1) + # no break because we're trying to add them all + fi + done + # We could also look for architecture-specific gcc options + fi + +]) + +dnl/*D +dnl PAC_PROG_C_UNALIGNED_DOUBLES - Check that the C compiler allows unaligned +dnl doubles +dnl +dnl Synopsis: +dnl PAC_PROG_C_UNALIGNED_DOUBLES(action-if-true,action-if-false, +dnl action-if-unknown) +dnl +dnl Notes: +dnl 'action-if-unknown' is used in the case of cross-compilation. +dnl D*/ +AC_DEFUN([PAC_PROG_C_UNALIGNED_DOUBLES],[ +AC_CACHE_CHECK([whether C compiler allows unaligned doubles], +pac_cv_prog_c_unaligned_doubles,[ +AC_TRY_RUN([ +void fetch_double( v ) +double *v; +{ +*v = 1.0; +} +int main( argc, argv ) +int argc; +char **argv; +{ +int p[4]; +double *p_val; +fetch_double( (double *)&(p[0]) ); +p_val = (double *)&(p[0]); +if (*p_val != 1.0) return 1; +fetch_double( (double *)&(p[1]) ); +p_val = (double *)&(p[1]); +if (*p_val != 1.0) return 1; +return 0; +} +],pac_cv_prog_c_unaligned_doubles="yes",pac_cv_prog_c_unaligned_doubles="no", +pac_cv_prog_c_unaligned_doubles="unknown")]) +ifelse($1,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "yes" ; then +$1 +fi) +ifelse($2,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "no" ; then +$2 +fi) +ifelse($3,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "unknown" ; then +$3 +fi) +]) + +dnl/*D +dnl PAC_PROG_C_WEAK_SYMBOLS - Test whether C supports weak alias symbols. +dnl +dnl Synopsis +dnl PAC_PROG_C_WEAK_SYMBOLS(action-if-true,action-if-false) +dnl +dnl Output Effect: +dnl Defines one of the following if a weak symbol pragma is found: +dnl.vb +dnl HAVE_PRAGMA_WEAK - #pragma weak +dnl HAVE_PRAGMA_HP_SEC_DEF - #pragma _HP_SECONDARY_DEF +dnl HAVE_PRAGMA_CRI_DUP - #pragma _CRI duplicate x as y +dnl.ve +dnl May also define +dnl.vb +dnl HAVE_WEAK_ATTRIBUTE +dnl.ve +dnl if functions can be declared as 'int foo(...) __attribute__ ((weak));' +dnl sets the shell variable pac_cv_attr_weak to yes. +dnl Also checks for __attribute__((weak_import)) which is supported by +dnl Apple in Mac OSX (at least in Darwin). Note that this provides only +dnl weak symbols, not weak aliases +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_C_WEAK_SYMBOLS],[ +pragma_extra_message="" +AC_CACHE_CHECK([for type of weak symbol alias support], +pac_cv_prog_c_weak_symbols,[ +# Test for weak symbol support... +# We can't put # in the message because it causes autoconf to generate +# incorrect code +AC_TRY_LINK([ +extern int PFoo(int); +#pragma weak PFoo = Foo +int Foo(int a) { return a; } +],[return PFoo(1);],has_pragma_weak=yes) +# +# Some systems (Linux ia64 and ecc, for example), support weak symbols +# only within a single object file! This tests that case. +# Note that there is an extern int PFoo declaration before the +# pragma. Some compilers require this in order to make the weak symbol +# extenally visible. +if test "$has_pragma_weak" = yes ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >>conftest1.c <>conftest2.c <>conftest1.c <>conftest2.c <> config.log + echo "Failed program was" >> config.log + cat conftest1.c >>config.log + cat conftest2.c >>config.log + if test -s conftest.out ; then cat conftest.out >> config.log ; fi + has_pragma_weak=0 + pragma_extra_message="pragma weak accepted but does not work (probably creates two non-weak entries)" + fi + else + echo "$ac_link2" >>config.log + echo "Failed program was" >>config.log + cat conftest1.c >>config.log + cat conftest2.c >>config.log + if test -s conftest.out ; then cat conftest.out >> config.log ; fi + has_pragma_weak=0 + pragma_extra_message="pragma weak does not work outside of a file" + fi + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* +fi +dnl +if test -z "$pac_cv_prog_c_weak_symbols" ; then + AC_TRY_LINK([ +extern int PFoo(int); +#pragma _HP_SECONDARY_DEF Foo PFoo +int Foo(int a) { return a; } +],[return PFoo(1);],pac_cv_prog_c_weak_symbols="pragma _HP_SECONDARY_DEF") +fi +dnl +if test -z "$pac_cv_prog_c_weak_symbols" ; then + AC_TRY_LINK([ +extern int PFoo(int); +#pragma _CRI duplicate PFoo as Foo +int Foo(int a) { return a; } +],[return PFoo(1);],pac_cv_prog_c_weak_symbols="pragma _CRI duplicate x as y") +fi +dnl +if test -z "$pac_cv_prog_c_weak_symbols" ; then + pac_cv_prog_c_weak_symbols="no" +fi +dnl +dnl If there is an extra explanatory message, echo it now so that it +dnl doesn't interfere with the cache result value +if test -n "$pragma_extra_message" ; then + echo $pragma_extra_message +fi +dnl +]) +if test "$pac_cv_prog_c_weak_symbols" = "no" ; then + ifelse([$2],,:,[$2]) +else + case "$pac_cv_prog_c_weak_symbols" in + "pragma weak") AC_DEFINE(HAVE_PRAGMA_WEAK,1,[Supports weak pragma]) + ;; + "pragma _HP") AC_DEFINE(HAVE_PRAGMA_HP_SEC_DEF,1,[HP style weak pragma]) + ;; + "pragma _CRI") AC_DEFINE(HAVE_PRAGMA_CRI_DUP,1,[Cray style weak pragma]) + ;; + esac + ifelse([$1],,:,[$1]) +fi +AC_CACHE_CHECK([whether __attribute__ ((weak)) allowed], +pac_cv_attr_weak,[ +AC_TRY_COMPILE([int foo(int) __attribute__ ((weak));],[int a;], +pac_cv_attr_weak=yes,pac_cv_attr_weak=no)]) +# Note that being able to compile with weak_import doesn't mean that +# it works. +AC_CACHE_CHECK([whether __attribute__ ((weak_import)) allowed], +pac_cv_attr_weak_import,[ +AC_TRY_COMPILE([int foo(int) __attribute__ ((weak_import));],[int a;], +pac_cv_attr_weak_import=yes,pac_cv_attr_weak_import=no)]) +# Check if the alias option for weak attributes is allowed +AC_CACHE_CHECK([whether __attribute__((weak,alias(...))) allowed], +pac_cv_attr_weak_alias,[ +AC_TRY_COMPILE([int foo(int) __attribute__((weak,alias("__foo")));],[int a;], +pac_cv_attr_weak_alias=yes,pac_cv_attr_weak_alias=no)]) +]) + +# +# This is a replacement that checks that FAILURES are signaled as well +# (later configure macros look for the .o file, not just success from the +# compiler, but they should not HAVE to +# +dnl --- insert 2.52 compatibility here --- +dnl 2.52 does not have AC_PROG_CC_WORKS +ifdef([AC_PROG_CC_WORKS],,[AC_DEFUN([AC_PROG_CC_WORKS],)]) +dnl +AC_DEFUN([PAC_PROG_CC_WORKS], +[AC_PROG_CC_WORKS +AC_MSG_CHECKING([whether the C compiler sets its return status correctly]) +AC_LANG_SAVE +AC_LANG_C +AC_TRY_COMPILE(,[int a = bzzzt;],notbroken=no,notbroken=yes) +AC_MSG_RESULT($notbroken) +if test "$notbroken" = "no" ; then + AC_MSG_ERROR([installation or configuration problem: C compiler does not +correctly set error code when a fatal error occurs]) +fi +]) + +dnl/*D +dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS - Test whether C and the +dnl linker allow multiple weak symbols. +dnl +dnl Synopsis +dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS(action-if-true,action-if-false) +dnl +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS],[ +AC_CACHE_CHECK([for multiple weak symbol support], +pac_cv_prog_c_multiple_weak_symbols,[ +# Test for multiple weak symbol support... +# +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +cat >>conftest1.c <>conftest2.c <>config.log + echo "Failed program was" >>config.log + cat conftest1.c >>config.log + cat conftest2.c >>config.log + if test -s conftest.out ; then cat conftest.out >> config.log ; fi +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +dnl +]) +if test "$pac_cv_prog_c_multiple_weak_symbols" = "yes" ; then + ifelse([$1],,:,[$1]) +else + ifelse([$2],,:,[$2]) +fi +]) + +dnl Use the value of enable-strict to update CFLAGS +dnl pac_cc_strict_flags contains the strict flags. +dnl +dnl -std=c89 is used to select the C89 version of the ANSI/ISO C standard. +dnl As of this writing, many C compilers still accepted only this version, +dnl not the later C99 version. When all compilers accept C99, this +dnl should be changed to the appropriate standard level. Note that we've +dnl had trouble with gcc 2.95.3 accepting -std=c89 but then trying to +dnl compile program with a invalid set of options +dnl (-D __STRICT_ANSI__-trigraphs) +AC_DEFUN([PAC_CC_STRICT],[ +export enable_strict_done +if test "$enable_strict_done" != "yes" ; then + + # Some comments on strict warning options. + # These were added to reduce warnings: + # -Wno-missing-field-initializers -- We want to allow a struct to be + # initialized to zero using "struct x y = {0};" and not require + # each field to be initialized individually. + # -Wno-unused-parameter -- For portability, some parameters go unused + # when we have different implementations of functions for + # different platforms + # -Wno-unused-label -- We add fn_exit: and fn_fail: on all functions, + # but fn_fail may not be used if the function doesn't return an + # error. + # -Wno-sign-compare -- read() and write() return bytes read/written + # as a signed value, but we often compare this to size_t (or + # msg_sz_t) variables. + # -Wno-format-zero-length -- this warning is irritating and useless, since + # a zero-length format string is very well defined + # -Wno-type-limits -- There are places where we compare an unsigned to + # a constant that happens to be zero e.g., if x is unsigned and + # MIN_VAL is zero, we'd like to do "MPIU_Assert(x >= MIN_VAL);". + # Note this option is not supported by gcc 4.2. This needs to be added + # after most other warning flags, so that we catch a gcc bug on 32-bit + # that doesn't give a warning that this is unsupported, unless another + # warning is triggered, and then if gives an error. + # These were removed to reduce warnings: + # -Wcast-qual -- Sometimes we need to cast "volatile char*" to + # "char*", e.g., for memcpy. + # -Wpadded -- We catch struct padding with asserts when we need to + # -Wredundant-decls -- Having redundant declarations is benign and the + # code already has some. + # -Waggregate-return -- This seems to be a performance-related warning + # aggregate return values are legal in ANSI C, but they may be returned + # in memory rather than through a register. We do use aggregate return + # values, but they are structs of a single basic type (used to enforce + # type checking for relative vs. absolute ptrs), and with optimization + # the aggregate value is converted to a scalar. + # the embedded newlines in this string are safe because we evaluate each + # argument in the for-loop below and append them to the CFLAGS with a space + # as the separator instead + pac_common_strict_flags=" + -Wall + -Wextra + -Wno-missing-field-initializers + -Wstrict-prototypes + -Wmissing-prototypes + -DGCC_WALL + -Wno-unused-parameter + -Wno-unused-label + -Wshadow + -Wmissing-declarations + -Wno-long-long + -Wfloat-equal + -Wdeclaration-after-statement + -Wundef + -Wno-endif-labels + -Wpointer-arith + -Wbad-function-cast + -Wcast-align + -Wwrite-strings + -Wno-sign-compare + -Wold-style-definition + -Wno-multichar + -Wno-deprecated-declarations + -Wpacked + -Wnested-externs + -Winvalid-pch + -Wno-pointer-sign + -Wvariadic-macros + -Wno-format-zero-length + -Wno-type-limits + " + + enable_c89=yes + enable_c99=no + enable_posix=yes + enable_opt=yes + flags="`echo $1 | sed -e 's/:/ /g' -e 's/,/ /g'`" + for flag in ${flags}; do + case "$flag" in + c89) + enable_strict_done="yes" + enable_c89=yes + ;; + c99) + enable_strict_done="yes" + enable_c99=yes + ;; + posix) + enable_strict_done="yes" + enable_posix=yes + ;; + noposix) + enable_strict_done="yes" + enable_posix=no + ;; + opt) + enable_strict_done="yes" + enable_opt=yes + ;; + noopt) + enable_strict_done="yes" + enable_opt=no + ;; + all|yes) + enable_strict_done="yes" + enable_c89=yes + enable_posix=yes + enable_opt=yes + ;; + no) + # Accept and ignore this value + : + ;; + *) + if test -n "$flag" ; then + AC_MSG_WARN([Unrecognized value for enable-strict:$flag]) + fi + ;; + esac + done + + pac_cc_strict_flags="" + if test "${enable_strict_done}" = "yes" ; then + if test "${enable_opt}" = "yes" ; then + pac_cc_strict_flags="-O2" + fi + pac_cc_strict_flags="$pac_cc_strict_flags $pac_common_strict_flags" + if test "${enable_posix}" = "yes" ; then + PAC_APPEND_FLAG([-D_POSIX_C_SOURCE=199506L],[pac_cc_strict_flags]) + fi + # We only allow one of strict-C99 or strict-C89 to be + # enabled. If C99 is enabled, we automatically disable C89. + if test "${enable_c99}" = "yes" ; then + PAC_APPEND_FLAG([-std=c99],[pac_cc_strict_flags]) + elif test "${enable_c89}" = "yes" ; then + PAC_APPEND_FLAG([-std=c89],[pac_cc_strict_flags]) + fi + fi + + # See if the above options work with the compiler + accepted_flags="" + for flag in $pac_cc_strict_flags ; do + PAC_PUSH_FLAG([CFLAGS]) + CFLAGS="$CFLAGS $accepted_flags" + PAC_C_CHECK_COMPILER_OPTION([$flag],[accepted_flags="$accepted_flags $flag"],) + PAC_POP_FLAG([CFLAGS]) + done + pac_cc_strict_flags=$accepted_flags +fi +]) + +dnl/*D +dnl PAC_ARG_STRICT - Add --enable-strict to configure. +dnl +dnl Synopsis: +dnl PAC_ARG_STRICT +dnl +dnl Output effects: +dnl Adds '--enable-strict' to the command line. +dnl +dnl D*/ +AC_DEFUN([PAC_ARG_STRICT],[ +AC_ARG_ENABLE(strict, + AC_HELP_STRING([--enable-strict], [Turn on strict compilation testing])) +PAC_CC_STRICT($enable_strict) +CFLAGS="$CFLAGS $pac_cc_strict_flags" +export CFLAGS +]) + +dnl Return the integer structure alignment in pac_cv_c_max_integer_align +dnl Possible values include +dnl packed +dnl two +dnl four +dnl eight +dnl +dnl In addition, a "Could not determine alignment" and a "error!" +dnl return is possible. +AC_DEFUN([PAC_C_MAX_INTEGER_ALIGN],[ +AC_CACHE_CHECK([for max C struct integer alignment], +pac_cv_c_max_integer_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int is_packed = 1; + int is_two = 1; + int is_four = 1; + int is_eight = 1; + struct { char a; int b; } char_int; + struct { char a; short b; } char_short; + struct { char a; long b; } char_long; + struct { char a; int b; char c; } char_int_char; + struct { char a; short b; char c; } char_short_char; +#ifdef HAVE_LONG_LONG_INT + struct { long long int a; char b; } lli_c; + struct { char a; long long int b; } c_lli; +#endif + int size, extent, extent2; + + /* assume max integer alignment isn't 8 if we don't have + * an eight-byte value :) + */ +#ifdef HAVE_LONG_LONG_INT + if (sizeof(int) < 8 && sizeof(long) < 8 && sizeof(long long int) < 8) + is_eight = 0; +#else + if (sizeof(int) < 8 && sizeof(long) < 8) is_eight = 0; +#endif + + size = sizeof(char) + sizeof(int); + extent = sizeof(char_int); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0) is_two = 0; + if ( (extent % 4) != 0) is_four = 0; + if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0; + DBG("char_int",size,extent); + + size = sizeof(char) + sizeof(short); + extent = sizeof(char_short); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0) is_two = 0; + if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0; + if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0; + DBG("char_short",size,extent); + + size = sizeof(char) + sizeof(long); + extent = sizeof(char_long); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0) is_two = 0; + if ( (extent % 4) != 0) is_four = 0; + if (sizeof(long) == 8 && (extent % 8) != 0) is_eight = 0; + DBG("char_long",size,extent); + +#ifdef HAVE_LONG_LONG_INT + size = sizeof(char) + sizeof(long long int); + extent = sizeof(lli_c); + extent2 = sizeof(c_lli); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(long long int) >= 8 && (extent % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; +#endif + + size = sizeof(char) + sizeof(int) + sizeof(char); + extent = sizeof(char_int_char); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0) is_two = 0; + if ( (extent % 4) != 0) is_four = 0; + if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0; + DBG("char_int_char",size,extent); + + size = sizeof(char) + sizeof(short) + sizeof(char); + extent = sizeof(char_short_char); + if (size != extent) is_packed = 0; + if ( (extent % 2) != 0) is_two = 0; + if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0; + if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0; + DBG("char_short_char",size,extent); + + /* If aligned mod 8, it will be aligned mod 4 */ + if (is_eight) { is_four = 0; is_two = 0; } + + if (is_four) is_two = 0; + + /* Tabulate the results */ + cf = fopen( "ctest.out", "w" ); + if (is_packed + is_two + is_four + is_eight == 0) { + fprintf( cf, "Could not determine alignment\n" ); + } + else { + if (is_packed + is_two + is_four + is_eight != 1) { + fprintf( cf, "error!\n" ); + } + else { + if (is_packed) fprintf( cf, "packed\n" ); + if (is_two) fprintf( cf, "two\n" ); + if (is_four) fprintf( cf, "four\n" ); + if (is_eight) fprintf( cf, "eight\n" ); + } + } + fclose( cf ); + return 0; +}], +pac_cv_c_max_integer_align=`cat ctest.out`, +pac_cv_c_max_integer_align="unknown", +pac_cv_c_max_integer_align="$CROSS_ALIGN_STRUCT_INT") +rm -f ctest.out +]) +if test -z "$pac_cv_c_max_integer_align" ; then + pac_cv_c_max_integer_align="unknown" +fi +]) + +dnl Return the floating point structure alignment in +dnl pac_cv_c_max_fp_align. +dnl +dnl Possible values include: +dnl packed +dnl two +dnl four +dnl eight +dnl sixteen +dnl +dnl In addition, a "Could not determine alignment" and a "error!" +dnl return is possible. +AC_DEFUN([PAC_C_MAX_FP_ALIGN],[ +AC_CACHE_CHECK([for max C struct floating point alignment], +pac_cv_c_max_fp_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int is_packed = 1; + int is_two = 1; + int is_four = 1; + int is_eight = 1; + int is_sixteen = 1; + struct { char a; float b; } char_float; + struct { float b; char a; } float_char; + struct { char a; double b; } char_double; + struct { double b; char a; } double_char; +#ifdef HAVE_LONG_DOUBLE + struct { char a; long double b; } char_long_double; + struct { long double b; char a; } long_double_char; + struct { long double a; int b; char c; } long_double_int_char; +#endif + int size, extent1, extent2; + + size = sizeof(char) + sizeof(float); + extent1 = sizeof(char_float); + extent2 = sizeof(float_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + DBG("char_float",size,extent1); + + size = sizeof(char) + sizeof(double); + extent1 = sizeof(char_double); + extent2 = sizeof(double_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + DBG("char_double",size,extent1); + +#ifdef HAVE_LONG_DOUBLE + size = sizeof(char) + sizeof(long double); + extent1 = sizeof(char_long_double); + extent2 = sizeof(long_double_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + if (sizeof(long double) > 8 && (extent1 % 16) != 0 + && (extent2 % 16) != 0) is_sixteen = 0; + DBG("char_long-double",size,extent1); + + extent1 = sizeof(long_double_int_char); + if ( (extent1 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0) is_four = 0; + if (sizeof(long double) >= 8 && (extent1 % 8) != 0) is_eight = 0; + if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0; +#else + is_sixteen = 0; +#endif + + if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; } + + if (is_eight) { is_four = 0; is_two = 0; } + + if (is_four) is_two = 0; + + /* Tabulate the results */ + cf = fopen( "ctest.out", "w" ); + if (is_packed + is_two + is_four + is_eight + is_sixteen == 0) { + fprintf( cf, "Could not determine alignment\n" ); + } + else { + if (is_packed + is_two + is_four + is_eight + is_sixteen != 1) { + fprintf( cf, "error!\n" ); + } + else { + if (is_packed) fprintf( cf, "packed\n" ); + if (is_two) fprintf( cf, "two\n" ); + if (is_four) fprintf( cf, "four\n" ); + if (is_eight) fprintf( cf, "eight\n" ); + if (is_sixteen) fprintf( cf, "sixteen\n" ); + } + } + fclose( cf ); + return 0; +}], +pac_cv_c_max_fp_align=`cat ctest.out`, +pac_cv_c_max_fp_align="unknown", +pac_cv_c_max_fp_align="$CROSS_ALIGN_STRUCT_FP") +rm -f ctest.out +]) +if test -z "$pac_cv_c_max_fp_align" ; then + pac_cv_c_max_fp_align="unknown" +fi +]) + +dnl Return the floating point structure alignment in +dnl pac_cv_c_max_double_fp_align. +dnl +dnl Possible values include: +dnl packed +dnl two +dnl four +dnl eight +dnl +dnl In addition, a "Could not determine alignment" and a "error!" +dnl return is possible. +AC_DEFUN([PAC_C_MAX_DOUBLE_FP_ALIGN],[ +AC_CACHE_CHECK([for max C struct alignment of structs with doubles], +pac_cv_c_max_double_fp_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int is_packed = 1; + int is_two = 1; + int is_four = 1; + int is_eight = 1; + struct { char a; float b; } char_float; + struct { float b; char a; } float_char; + struct { char a; double b; } char_double; + struct { double b; char a; } double_char; + int size, extent1, extent2; + + size = sizeof(char) + sizeof(float); + extent1 = sizeof(char_float); + extent2 = sizeof(float_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + DBG("char_float",size,extent1); + + size = sizeof(char) + sizeof(double); + extent1 = sizeof(char_double); + extent2 = sizeof(double_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + DBG("char_double",size,extent1); + + if (is_eight) { is_four = 0; is_two = 0; } + + if (is_four) is_two = 0; + + /* Tabulate the results */ + cf = fopen( "ctest.out", "w" ); + if (is_packed + is_two + is_four + is_eight == 0) { + fprintf( cf, "Could not determine alignment\n" ); + } + else { + if (is_packed + is_two + is_four + is_eight != 1) { + fprintf( cf, "error!\n" ); + } + else { + if (is_packed) fprintf( cf, "packed\n" ); + if (is_two) fprintf( cf, "two\n" ); + if (is_four) fprintf( cf, "four\n" ); + if (is_eight) fprintf( cf, "eight\n" ); + } + } + fclose( cf ); + return 0; +}], +pac_cv_c_max_double_fp_align=`cat ctest.out`, +pac_cv_c_max_double_fp_align="unknown", +pac_cv_c_max_double_fp_align="$CROSS_ALIGN_STRUCT_DOUBLE_FP") +rm -f ctest.out +]) +if test -z "$pac_cv_c_max_double_fp_align" ; then + pac_cv_c_max_double_fp_align="unknown" +fi +]) +AC_DEFUN([PAC_C_MAX_LONGDOUBLE_FP_ALIGN],[ +AC_CACHE_CHECK([for max C struct floating point alignment with long doubles], +pac_cv_c_max_longdouble_fp_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int is_packed = 1; + int is_two = 1; + int is_four = 1; + int is_eight = 1; + int is_sixteen = 1; + struct { char a; long double b; } char_long_double; + struct { long double b; char a; } long_double_char; + struct { long double a; int b; char c; } long_double_int_char; + int size, extent1, extent2; + + size = sizeof(char) + sizeof(long double); + extent1 = sizeof(char_long_double); + extent2 = sizeof(long_double_char); + if (size != extent1) is_packed = 0; + if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; + if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) + is_eight = 0; + if (sizeof(long double) > 8 && (extent1 % 16) != 0 + && (extent2 % 16) != 0) is_sixteen = 0; + DBG("char_long-double",size,extent1); + + extent1 = sizeof(long_double_int_char); + if ( (extent1 % 2) != 0) is_two = 0; + if ( (extent1 % 4) != 0) is_four = 0; + if (sizeof(long double) >= 8 && (extent1 % 8) != 0) is_eight = 0; + if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0; + + if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; } + + if (is_eight) { is_four = 0; is_two = 0; } + + if (is_four) is_two = 0; + + /* Tabulate the results */ + cf = fopen( "ctest.out", "w" ); + if (is_packed + is_two + is_four + is_eight + is_sixteen == 0) { + fprintf( cf, "Could not determine alignment\n" ); + } + else { + if (is_packed + is_two + is_four + is_eight + is_sixteen != 1) { + fprintf( cf, "error!\n" ); + } + else { + if (is_packed) fprintf( cf, "packed\n" ); + if (is_two) fprintf( cf, "two\n" ); + if (is_four) fprintf( cf, "four\n" ); + if (is_eight) fprintf( cf, "eight\n" ); + if (is_sixteen) fprintf( cf, "sixteen\n" ); + } + } + fclose( cf ); + return 0; +}], +pac_cv_c_max_longdouble_fp_align=`cat ctest.out`, +pac_cv_c_max_longdouble_fp_align="unknown", +pac_cv_c_max_longdouble_fp_align="$CROSS_ALIGN_STRUCT_LONGDOUBLE_FP") +rm -f ctest.out +]) +if test -z "$pac_cv_c_max_longdouble_fp_align" ; then + pac_cv_c_max_longdouble_fp_align="unknown" +fi +]) + +dnl Other tests assume that there is potentially a maximum alignment +dnl and that if there is no maximum alignment, or a type is smaller than +dnl that value, then we align on the size of the value, with the exception +dnl of the "position-based alignment" rules we test for separately. +dnl +dnl It turns out that these assumptions have fallen short in at least one +dnl case, on MacBook Pros, where doubles are aligned on 4-byte boundaries +dnl even when long doubles are aligned on 16-byte boundaries. So this test +dnl is here specifically to handle this case. +dnl +dnl Puts result in pac_cv_c_double_alignment_exception. +dnl +dnl Possible values currently include no and four. +dnl +AC_DEFUN([PAC_C_DOUBLE_ALIGNMENT_EXCEPTION],[ +AC_CACHE_CHECK([if double alignment breaks rules, find actual alignment], +pac_cv_c_double_alignment_exception,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + struct { char a; double b; } char_double; + struct { double b; char a; } double_char; + int extent1, extent2, align_4 = 0; + + extent1 = sizeof(char_double); + extent2 = sizeof(double_char); + + /* we're interested in the largest value, will let separate test + * deal with position-based issues. + */ + if (extent1 < extent2) extent1 = extent2; + if ((sizeof(double) == 8) && (extent1 % 8) != 0) { + if (extent1 % 4 == 0) { +#ifdef HAVE_MAX_FP_ALIGNMENT + if (HAVE_MAX_FP_ALIGNMENT >= 8) align_4 = 1; +#else + align_4 = 1; +#endif + } + } + + cf = fopen( "ctest.out", "w" ); + + if (align_4) fprintf( cf, "four\n" ); + else fprintf( cf, "no\n" ); + + fclose( cf ); + return 0; +}], +pac_cv_c_double_alignment_exception=`cat ctest.out`, +pac_cv_c_double_alignment_exception="unknown", +pac_cv_c_double_alignment_exception="$CROSS_ALIGN_DOUBLE_EXCEPTION") +rm -f ctest.out +]) +if test -z "$pac_cv_c_double_alignment_exception" ; then + pac_cv_c_double_alignment_exception="unknown" +fi +]) + +dnl Test for odd struct alignment rule that only applies max. +dnl padding when double value is at front of type. +dnl Puts result in pac_cv_c_double_pos_align. +dnl +dnl Search for "Power alignment mode" for more details. +dnl +dnl Possible values include yes, no, and unknown. +dnl +AC_DEFUN([PAC_C_DOUBLE_POS_ALIGN],[ +AC_CACHE_CHECK([if alignment of structs with doubles is based on position], +pac_cv_c_double_pos_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int padding_varies_by_pos = 0; + struct { char a; double b; } char_double; + struct { double b; char a; } double_char; + int extent1, extent2; + + extent1 = sizeof(char_double); + extent2 = sizeof(double_char); + if (extent1 != extent2) padding_varies_by_pos = 1; + + cf = fopen( "ctest.out", "w" ); + if (padding_varies_by_pos) fprintf( cf, "yes\n" ); + else fprintf( cf, "no\n" ); + + fclose( cf ); + return 0; +}], +pac_cv_c_double_pos_align=`cat ctest.out`, +pac_cv_c_double_pos_align="unknown", +pac_cv_c_double_pos_align="$CROSS_ALIGN_DOUBLE_POS") +rm -f ctest.out +]) +if test -z "$pac_cv_c_double_pos_align" ; then + pac_cv_c_double_pos_align="unknown" +fi +]) + +dnl Test for odd struct alignment rule that only applies max. +dnl padding when long long int value is at front of type. +dnl Puts result in pac_cv_c_llint_pos_align. +dnl +dnl Search for "Power alignment mode" for more details. +dnl +dnl Possible values include yes, no, and unknown. +dnl +AC_DEFUN([PAC_C_LLINT_POS_ALIGN],[ +AC_CACHE_CHECK([if alignment of structs with long long ints is based on position], +pac_cv_c_llint_pos_align,[ +AC_TRY_RUN([ +#include +#define DBG(a,b,c) +int main( int argc, char *argv[] ) +{ + FILE *cf; + int padding_varies_by_pos = 0; +#ifdef HAVE_LONG_LONG_INT + struct { char a; long long int b; } char_llint; + struct { long long int b; char a; } llint_char; + int extent1, extent2; + + extent1 = sizeof(char_llint); + extent2 = sizeof(llint_char); + if (extent1 != extent2) padding_varies_by_pos = 1; +#endif + + cf = fopen( "ctest.out", "w" ); + if (padding_varies_by_pos) fprintf( cf, "yes\n" ); + else fprintf( cf, "no\n" ); + + fclose( cf ); + return 0; +}], +pac_cv_c_llint_pos_align=`cat ctest.out`, +pac_cv_c_llint_pos_align="unknown", +pac_cv_c_llint_pos_align="$CROSS_ALIGN_LLINT_POS") +rm -f ctest.out +]) +if test -z "$pac_cv_c_llint_pos_align" ; then + pac_cv_c_llint_pos_align="unknown" +fi +]) + +dnl/*D +dnl PAC_FUNC_NEEDS_DECL - Set NEEDS__DECL if a declaration is needed +dnl +dnl Synopsis: +dnl PAC_FUNC_NEEDS_DECL(headerfiles,funcname) +dnl +dnl Output Effect: +dnl Sets 'NEEDS__DECL' if 'funcname' is not declared by the +dnl headerfiles. +dnl +dnl Approach: +dnl Try to compile a program with the function, but passed with an incorrect +dnl calling sequence. If the compilation fails, then the declaration +dnl is provided within the header files. If the compilation succeeds, +dnl the declaration is required. +dnl +dnl We use a 'double' as the first argument to try and catch varargs +dnl routines that may use an int or pointer as the first argument. +dnl +dnl There is one difficulty - if the compiler has been instructed to +dnl fail on implicitly defined functions, then this test will always +dnl fail. +dnl +dnl D*/ +AC_DEFUN([PAC_FUNC_NEEDS_DECL],[ +AC_CACHE_CHECK([whether $2 needs a declaration], +pac_cv_func_decl_$2,[ +AC_TRY_COMPILE([$1 +int $2(double, int, double, const char *);],[int a=$2(1.0,27,1.0,"foo");], +pac_cv_func_decl_$2=yes,pac_cv_func_decl_$2=no)]) +if test "$pac_cv_func_decl_$2" = "yes" ; then +changequote(<<,>>)dnl +define(<>, translit(NEEDS_$2_DECL, [a-z *], [A-Z__]))dnl +changequote([, ])dnl + AC_DEFINE_UNQUOTED(PAC_FUNC_NAME,1,[Define if $2 needs a declaration]) +undefine([PAC_FUNC_NAME]) +fi +]) + +dnl PAC_C_GNU_ATTRIBUTE - See if the GCC __attribute__ specifier is allow. +dnl Use the following +dnl #ifndef HAVE_GCC_ATTRIBUTE +dnl #define __attribute__(a) +dnl #endif +dnl If *not*, define __attribute__(a) as null +dnl +dnl We start by requiring Gcc. Some other compilers accept __attribute__ +dnl but generate warning messages, or have different interpretations +dnl (which seems to make __attribute__ just as bad as #pragma) +dnl For example, the Intel icc compiler accepts __attribute__ and +dnl __attribute__((pure)) but generates warnings for __attribute__((format...)) +dnl +AC_DEFUN([PAC_C_GNU_ATTRIBUTE],[ +AC_REQUIRE([AC_PROG_CC_GNU]) +if test "$ac_cv_prog_gcc" = "yes" ; then + AC_CACHE_CHECK([whether __attribute__ allowed], +pac_cv_gnu_attr_pure,[ +AC_TRY_COMPILE([int foo(int) __attribute__ ((pure));],[int a;], +pac_cv_gnu_attr_pure=yes,pac_cv_gnu_attr_pure=no)]) +AC_CACHE_CHECK([whether __attribute__((format)) allowed], +pac_cv_gnu_attr_format,[ +AC_TRY_COMPILE([int foo(char *,...) __attribute__ ((format(printf,1,2)));],[int a;], +pac_cv_gnu_attr_format=yes,pac_cv_gnu_attr_format=no)]) + if test "$pac_cv_gnu_attr_pure" = "yes" -a "$pac_cv_gnu_attr_format" = "yes" ; then + AC_DEFINE(HAVE_GCC_ATTRIBUTE,1,[Define if GNU __attribute__ is supported]) + fi +fi +]) +dnl +dnl Check for a broken install (fails to preserve file modification times, +dnl thus breaking libraries. +dnl +dnl Create a library, install it, and then try to link against it. +AC_DEFUN([PAC_PROG_INSTALL_BREAKS_LIBS],[ +AC_CACHE_CHECK([whether install breaks libraries], +ac_cv_prog_install_breaks_libs,[ +AC_REQUIRE([AC_PROG_RANLIB]) +AC_REQUIRE([AC_PROG_INSTALL]) +AC_REQUIRE([AC_PROG_CC]) +ac_cv_prog_install_breaks_libs=yes +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f libconftest* conftest* +echo 'int foo(int);int foo(int a){return a;}' > conftest1.c +echo 'extern int foo(int); int main( int argc, char **argv){ return foo(0); }' > conftest2.c +if ${CC-cc} $CFLAGS -c conftest1.c >conftest.out 2>&1 ; then + if ${AR-ar} cr libconftest.a conftest1.o >/dev/null 2>&1 ; then + if ${RANLIB-:} libconftest.a >/dev/null 2>&1 ; then + # Anything less than sleep 10, and Mac OS/X (Darwin) + # will claim that install works because ranlib won't complain + sleep 10 + libinstall="$INSTALL_DATA" + eval "libinstall=\"$libinstall\"" + if ${libinstall} libconftest.a libconftest1.a >/dev/null 2>&1 ; then + if ${CC-cc} $CFLAGS -o conftest conftest2.c $LDFLAGS libconftest1.a >>conftest.out 2>&1 && test -x conftest ; then + # Success! Install works + ac_cv_prog_install_breaks_libs=no + else + # Failure! Does install -p work? + rm -f libconftest1.a + if ${libinstall} -p libconftest.a libconftest1.a >/dev/null 2>&1 ; then + if ${CC-cc} $CFLAGS -o conftest conftest2.c $LDFLAGS libconftest1.a >>conftest.out 2>&1 && test -x conftest ; then + # Success! Install works + ac_cv_prog_install_breaks_libs="no, with -p" + fi + fi + fi + fi + fi + fi +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* libconftest*]) + +if test -z "$RANLIB_AFTER_INSTALL" ; then + RANLIB_AFTER_INSTALL=no +fi +case "$ac_cv_prog_install_breaks_libs" in + yes) + RANLIB_AFTER_INSTALL=yes + ;; + "no, with -p") + INSTALL_DATA="$INSTALL_DATA -p" + ;; + *) + # Do nothing + : + ;; +esac +AC_SUBST(RANLIB_AFTER_INSTALL) +]) + +# +# determine if the compiler defines a symbol containing the function name +# Inspired by checks within the src/mpid/globus/configure.in file in MPICH2 +# +# These tests check not only that the compiler defines some symbol, such +# as __FUNCTION__, but that the symbol correctly names the function. +# +# Defines +# HAVE__FUNC__ (if __func__ defined) +# HAVE_CAP__FUNC__ (if __FUNC__ defined) +# HAVE__FUNCTION__ (if __FUNCTION__ defined) +# +AC_DEFUN([PAC_CC_FUNCTION_NAME_SYMBOL],[ +AC_CACHE_CHECK([whether the compiler defines __func__], +pac_cv_have__func__,[ +tmp_am_cross=no +AC_RUN_IFELSE([ +AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__func__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} +]) +], pac_cv_have__func__=yes, pac_cv_have__func__=no,tmp_am_cross=yes) +if test "$tmp_am_cross" = yes ; then + AC_LINK_IFELSE([ + AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__func__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} + ]) +], pac_cv_have__func__=yes, pac_cv_have__func__=no) +fi +]) + +if test "$pac_cv_have__func__" = "yes" ; then + AC_DEFINE(HAVE__FUNC__,,[define if the compiler defines __func__]) +fi + +AC_CACHE_CHECK([whether the compiler defines __FUNC__], +pac_cv_have_cap__func__,[ +tmp_am_cross=no +AC_RUN_IFELSE([ +AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__FUNC__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} +]) +], pac_cv_have_cap__func__=yes, pac_cv_have_cap__func__=no,tmp_am_cross=yes) +if test "$tmp_am_cross" = yes ; then + AC_LINK_IFELSE([ + AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__FUNC__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} + ]) +], pac_cv_have__func__=yes, pac_cv_have__func__=no) +fi +]) + +if test "$pac_cv_have_cap__func__" = "yes" ; then + AC_DEFINE(HAVE_CAP__FUNC__,,[define if the compiler defines __FUNC__]) +fi + +AC_CACHE_CHECK([whether the compiler sets __FUNCTION__], +pac_cv_have__function__,[ +tmp_am_cross=no +AC_RUN_IFELSE([ +AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__FUNCTION__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} +]) +], pac_cv_have__function__=yes, pac_cv_have__function__=no,tmp_am_cross=yes) +if test "$tmp_am_cross" = yes ; then + AC_LINK_IFELSE([ + AC_LANG_SOURCE([ +#include +int foo(void); +int foo(void) +{ + return (strcmp(__FUNCTION__, "foo") == 0); +} +int main(int argc, char ** argv) +{ + return (foo() ? 0 : 1); +} + ]) +], pac_cv_have__func__=yes, pac_cv_have__func__=no) +fi +]) + +if test "$pac_cv_have__function__" = "yes" ; then + AC_DEFINE(HAVE__FUNCTION__,,[define if the compiler defines __FUNCTION__]) +fi + +]) + + +dnl Check structure alignment +AC_DEFUN([PAC_STRUCT_ALIGNMENT],[ + # Initialize alignment checks + is_packed=1 + is_two=1 + is_four=1 + is_eight=1 + is_largest=1 + + # See if long double exists + AC_TRY_COMPILE(,[long double a;],have_long_double=yes,have_long_double=no) + + # Get sizes of regular types + AC_CHECK_SIZEOF(char) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(long) + AC_CHECK_SIZEOF(float) + AC_CHECK_SIZEOF(double) + AC_CHECK_SIZEOF(long double) + + # char_int comparison + AC_CHECK_SIZEOF(char_int, 0, [typedef struct { char a; int b; } char_int; ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_int` + extent=$ac_cv_sizeof_char_int + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_int`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_int" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_short comparison + AC_CHECK_SIZEOF(char_short, 0, [typedef struct { char a; short b; } char_short; ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_short` + extent=$ac_cv_sizeof_char_short + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_short`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "$ac_cv_sizeof_short" = "4" -a "`expr $extent % 4`" != "0" ; then + is_four=0 + fi + if test "$ac_cv_sizeof_short" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_long comparison + AC_CHECK_SIZEOF(char_long, 0, [typedef struct { char a; long b; } char_long; ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_long` + extent=$ac_cv_sizeof_char_long + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_long`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_long" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_float comparison + AC_CHECK_SIZEOF(char_float, 0, [typedef struct { char a; float b; } char_float; ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_float` + extent=$ac_cv_sizeof_char_float + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_float`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_float" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_double comparison + AC_CHECK_SIZEOF(char_double, 0, [typedef struct { char a; double b; } char_double; ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_double` + extent=$ac_cv_sizeof_char_double + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_double`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_double" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_long_double comparison + if test "$have_long_double" = "yes"; then + AC_CHECK_SIZEOF(char_long_double, 0, [ + typedef struct { + char a; + long double b; + } char_long_double; + ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_long_double` + extent=$ac_cv_sizeof_char_long_double + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_long_double`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_long_double" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + fi + + # char_int_char comparison + AC_CHECK_SIZEOF(char_int_char, 0, [ + typedef struct { + char a; + int b; + char c; + } char_int_char; + ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_int + $ac_cv_sizeof_char` + extent=$ac_cv_sizeof_char_int_char + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_int`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi + if test "$ac_cv_sizeof_int" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # char_short_char comparison + AC_CHECK_SIZEOF(char_short_char, 0, [ + typedef struct { + char a; + short b; + char c; + } char_short_char; + ]) + size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_short + $ac_cv_sizeof_char` + extent=$ac_cv_sizeof_char_short_char + if test "$size" != "$extent" ; then is_packed=0 ; fi + if test "`expr $extent % $ac_cv_sizeof_short`" != "0" ; then is_largest=0 ; fi + if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi + if test "$ac_cv_sizeof_short" = "4" -a "`expr $extent % 4`" != "0" ; then + is_four=0 + fi + if test "$ac_cv_sizeof_short" = "8" -a "`expr $extent % 8`" != "0" ; then + is_eight=0 + fi + + # If aligned mod 8, it will be aligned mod 4 + if test $is_eight = 1 ; then is_four=0 ; is_two=0 ; fi + if test $is_four = 1 ; then is_two=0 ; fi + + # Largest supersedes 8 + if test $is_largest = 1 ; then is_eight=0 ; fi + + # Find the alignment + if test "`expr $is_packed + $is_largest + $is_two + $is_four + $is_eight`" = "0" ; then + pac_cv_struct_alignment="unknown" + elif test "`expr $is_packed + $is_largest + $is_two + $is_four + $is_eight`" != "1" ; then + pac_cv_struct_alignment="unknown" + elif test $is_packed = 1 ; then + pac_cv_struct_alignment="packed" + elif test $is_largest = 1 ; then + pac_cv_struct_alignment="largest" + elif test $is_two = 1 ; then + pac_cv_struct_alignment="two" + elif test $is_four = 1 ; then + pac_cv_struct_alignment="four" + elif test $is_eight = 1 ; then + pac_cv_struct_alignment="eight" + fi +]) +dnl +dnl PAC_C_MACRO_VA_ARGS +dnl +dnl will AC_DEFINE([HAVE_MACRO_VA_ARGS]) if the compiler supports C99 variable +dnl length argument lists in macros (#define foo(...) bar(__VA_ARGS__)) +AC_DEFUN([PAC_C_MACRO_VA_ARGS],[ + AC_MSG_CHECKING([for variable argument list macro functionality]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([ + #include + #define conftest_va_arg_macro(...) printf(__VA_ARGS__) + ], + [conftest_va_arg_macro("a test %d", 3);])], + [AC_DEFINE([HAVE_MACRO_VA_ARGS],[1],[Define if C99-style variable argument list macro functionality]) + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) +])dnl diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4 new file mode 100644 index 0000000000..c43b64f0ab --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4 @@ -0,0 +1,174 @@ +dnl PAC_PROG_CXX - reprioritize the C++ compiler search order +AC_DEFUN([PAC_PROG_CXX],[ + PAC_PUSH_FLAG([CXXFLAGS]) + AC_PROG_CXX([g++ icpc pgCC xlC pathCC cl]) + PAC_POP_FLAG([CXXFLAGS]) +]) + +dnl This is from crypt.to/autoconf-archive, slightly modified. +dnl It defines bool as int if it is not availalbe +dnl +AC_DEFUN([AC_CXX_BOOL], +[AC_CACHE_CHECK(whether the compiler recognizes bool as a built-in type, +ac_cv_cxx_bool, +[AC_LANG_SAVE + AC_LANG_CPLUSPLUS + AC_TRY_COMPILE([ +int f(int x){return 1;} +int f(char x){return 1;} +int f(bool x){return 1;} +],[bool b = true; return f(b);], + ac_cv_cxx_bool=yes, ac_cv_cxx_bool=no) + AC_LANG_RESTORE +]) +if test "$ac_cv_cxx_bool" != yes; then + AC_DEFINE(bool,int,[define if bool is a built-in type]) +fi +]) + +dnl This is from crypt.to/autoconf-archive, slightly modified (name defined) +dnl +AC_DEFUN([AC_CXX_EXCEPTIONS], +[AC_CACHE_CHECK(whether the compiler supports exceptions, +ac_cv_cxx_exceptions, +[AC_LANG_SAVE + AC_LANG_CPLUSPLUS + AC_TRY_COMPILE(,[try { throw 1; } catch (int i) { return i; }], + ac_cv_cxx_exceptions=yes, ac_cv_cxx_exceptions=no) + AC_LANG_RESTORE +]) +if test "$ac_cv_cxx_exceptions" = yes; then + AC_DEFINE(HAVE_CXX_EXCEPTIONS,,[define if the compiler supports exceptions]) +fi +]) + +dnl This is from crypt.to/autoconf-archive +dnl +AC_DEFUN([AC_CXX_NAMESPACES], +[AC_CACHE_CHECK(whether the compiler implements namespaces, +ac_cv_cxx_namespaces, +[AC_LANG_SAVE + AC_LANG_CPLUSPLUS + AC_TRY_COMPILE([namespace Outer { namespace Inner { int i = 0; }}], + [using namespace Outer::Inner; return i;], + ac_cv_cxx_namespaces=yes, ac_cv_cxx_namespaces=no) + AC_LANG_RESTORE +]) +if test "$ac_cv_cxx_namespaces" = yes; then + AC_DEFINE(HAVE_NAMESPACES,,[define if the compiler implements namespaces]) +fi +]) + +dnl Some compilers support namespaces but don't know about std +dnl +AC_DEFUN([AC_CXX_NAMESPACE_STD], +[AC_REQUIRE([AC_CXX_NAMESPACES]) +AC_CACHE_CHECK(whether the compiler implements the namespace std, +ac_cv_cxx_namespace_std, +[ac_cv_cxx_namespace_std=no +if test "$ac_cv_cxx_namespaces" = yes ; then + AC_LANG_SAVE + AC_LANG_CPLUSPLUS + AC_TRY_COMPILE([ +#include +using namespace std;], + [cout << "message\n";], + ac_cv_cxx_namespace_std=yes, ac_cv_cxx_namespace_std=no) + AC_LANG_RESTORE +fi +]) +if test "$ac_cv_cxx_namespace_std" = yes; then + AC_DEFINE(HAVE_NAMESPACE_STD,,[define if the compiler implements namespace std]) +fi +]) + +dnl/*D +dnl PAC_CXX_CHECK_COMPILER_OPTION - Check that a C++ compiler option is +dnl accepted without warning messages +dnl +dnl Synopsis: +dnl PAC_CXX_CHECK_COMPILER_OPTION(optionname,action-if-ok,action-if-fail) +dnl +dnl Output Effects: +dnl +dnl If no actions are specified, a working value is added to 'CXXOPTIONS' +dnl +dnl Notes: +dnl This is now careful to check that the output is different, since +dnl some compilers are noisy. +dnl +dnl We are extra careful to prototype the functions in case compiler options +dnl that complain about poor code are in effect. +dnl +dnl Because this is a long script, we have ensured that you can pass a +dnl variable containing the option name as the first argument. +dnl D*/ +AC_DEFUN([PAC_CXX_CHECK_COMPILER_OPTION],[ +AC_MSG_CHECKING([whether C++ compiler accepts option $1]) +save_CXXFLAGS="$CXXFLAGS" +CXXFLAGS="$1 $CXXFLAGS" +rm -f conftest.out +echo 'int foo(void);int foo(void){return 0;}' > conftest2.cpp +echo 'int main(void);int main(void){return 0;}' > conftest.cpp +if ${CXX-g++} $save_CXXFLAGS $CPPFLAGS -o conftest conftest.cpp $LDFLAGS >conftest.bas 2>&1 ; then + if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest.cpp $LDFLAGS >conftest.out 2>&1 ; then + if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then + AC_MSG_RESULT(yes) + AC_MSG_CHECKING([whether routines compiled with $1 can be linked with ones compiled without $1]) + rm -f conftest.out + rm -f conftest.bas + if ${CXX-g++} -c $save_CXXFLAGS $CPPFLAGS conftest2.cpp >conftest2.out 2>&1 ; then + if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest2.o conftest.cpp $LDFLAGS >conftest.bas 2>&1 ; then + if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest2.o conftest.cpp $LDFLAGS >conftest.out 2>&1 ; then + if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then + AC_MSG_RESULT(yes) + CXXFLAGS="$save_CXXFLAGS" + ifelse($2,,CXXOPTIONS="$CXXOPTIONS $1",$2) + elif test -s conftest.out ; then + cat conftest.out >&AC_FD_CC + AC_MSG_RESULT(no) + CXXFLAGS="$save_CXXFLAGS" + $3 + else + AC_MSG_RESULT(no) + CXXFLAGS="$save_CXXFLAGS" + $3 + fi + else + if test -s conftest.out ; then + cat conftest.out >&AC_FD_CC + fi + AC_MSG_RESULT(no) + CXXFLAGS="$save_CXXFLAGS" + $3 + fi + else + # Could not link with the option! + AC_MSG_RESULT(no) + fi + else + if test -s conftest2.out ; then + cat conftest2.out >&AC_FD_CC + fi + AC_MSG_RESULT(no) + CXXFLAGS="$save_CXXFLAGS" + $3 + fi + else + cat conftest.out >&AC_FD_CC + AC_MSG_RESULT(no) + $3 + CXXFLAGS="$save_CXXFLAGS" + fi + else + AC_MSG_RESULT(no) + $3 + if test -s conftest.out ; then cat conftest.out >&AC_FD_CC ; fi + CXXFLAGS="$save_CXXFLAGS" + fi +else + # Could not compile without the option! + AC_MSG_RESULT(no) +fi +rm -f conftest* +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4 new file mode 100644 index 0000000000..46bd25e2e0 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4 @@ -0,0 +1,1640 @@ +dnl PAC_PROG_F77 - reprioritize the F77 compiler search order +AC_DEFUN([PAC_PROG_F77],[ +PAC_PUSH_FLAG([FFLAGS]) +AC_PROG_F77([ifort pgf77 af77 xlf frt cf77 fort77 fl32 fort ifc efc ftn gfortran f77 g77]) +PAC_POP_FLAG([FFLAGS]) +]) + +dnl +dnl/*D +dnl PAC_PROG_F77_NAME_MANGLE - Determine how the Fortran compiler mangles +dnl names +dnl +dnl Synopsis: +dnl PAC_PROG_F77_NAME_MANGLE([action]) +dnl +dnl Output Effect: +dnl If no action is specified, one of the following names is defined: +dnl.vb +dnl If fortran names are mapped: +dnl lower -> lower F77_NAME_LOWER +dnl lower -> lower_ F77_NAME_LOWER_USCORE +dnl lower -> UPPER F77_NAME_UPPER +dnl lower_lower -> lower__ F77_NAME_LOWER_2USCORE +dnl mixed -> mixed F77_NAME_MIXED +dnl mixed -> mixed_ F77_NAME_MIXED_USCORE +dnl mixed -> UPPER@STACK_SIZE F77_NAME_UPPER_STDCALL +dnl.ve +dnl If an action is specified, it is executed instead. +dnl +dnl Notes: +dnl We assume that if lower -> lower (any underscore), upper -> upper with the +dnl same underscore behavior. Previous versions did this by +dnl compiling a Fortran program and running strings -a over it. Depending on +dnl strings is a bad idea, so instead we try compiling and linking with a +dnl C program, since that is why we are doing this anyway. A similar approach +dnl is used by FFTW, though without some of the cases we check (specifically, +dnl mixed name mangling). STD_CALL not only specifies a particular name +dnl mangling convention (adding the size of the calling stack into the function +dnl name, but also the stack management convention (callee cleans the stack, +dnl and arguments are pushed onto the stack from right to left) +dnl +dnl One additional problem is that some Fortran implementations include +dnl references to the runtime (like pgf90_compiled for the pgf90 compiler +dnl used as the "Fortran 77" compiler). This is not yet solved. +dnl +dnl D*/ +dnl +AC_DEFUN([PAC_PROG_F77_NAME_MANGLE],[ +AC_CACHE_CHECK([for Fortran 77 name mangling], +pac_cv_prog_f77_name_mangle, +[ + # Check for strange behavior of Fortran. For example, some FreeBSD + # systems use f2c to implement f77, and the version of f2c that they + # use generates TWO (!!!) trailing underscores + # Currently, WDEF is not used but could be... + # + # Eventually, we want to be able to override the choices here and + # force a particular form. This is particularly useful in systems + # where a Fortran compiler option is used to force a particular + # external name format (rs6000 xlf, for example). + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat > conftest.f <&AC_FD_CC + cat conftest.f >&AC_FD_CC + fi + + AC_LANG_SAVE + AC_LANG_C + save_LIBS="$LIBS" + dnl FLIBS comes from AC_F77_LIBRARY_LDFLAGS + LIBS="fconftestf.o $FLIBS $LIBS" + AC_TRY_LINK([extern void my_name(int);],my_name(0);,pac_cv_prog_f77_name_mangle="lower") + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([extern void my_name_(int);],my_name_(0);,pac_cv_prog_f77_name_mangle="lower underscore") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([void __stdcall MY_NAME(int);],MY_NAME(0);,pac_cv_prog_f77_name_mangle="upper stdcall") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([extern void MY_NAME(int);],MY_NAME(0);,pac_cv_prog_f77_name_mangle="upper") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([extern void my_name__(int);],my_name__(0);, + pac_cv_prog_f77_name_mangle="lower doubleunderscore") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([extern void MY_name(int);],MY_name(0);,pac_cv_prog_f77_name_mangle="mixed") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_TRY_LINK([extern void MY_name_(int);],MY_name_(0);,pac_cv_prog_f77_name_mangle="mixed underscore") + fi + LIBS="$save_LIBS" + AC_LANG_RESTORE + # If we got to this point, it may be that the programs have to be + # linked with the Fortran, not the C, compiler. Try reversing + # the language used for the test + dnl Note that the definition of AC_TRY_LINK and AC_LANG_PROGRAM + dnl is broken in autoconf and will generate spurious warning messages + dnl To fix this, we use + dnl AC _LINK_IFELSE([AC _LANG_PROGRAM(,[[body]])],action-if-true) + dnl instead of AC _TRY_LINK(,body,action-if-true) + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + AC_LANG_SAVE + AC_LANG_FORTRAN77 + save_LIBS="$LIBS" + LIBS="conftestc.o $LIBS" + if test "X$ac_ccompile" = "X" ; then + ac_ccompile='${CC-cc} -c $CFLAGS conftest.c 1>&AC_FD_CC' + fi + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat > conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call my_name(0)]])], + pac_cv_prog_f77_name_mangle="lower") + + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat > conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call my_name(0)]])], + pac_cv_prog_f77_name_mangle="lower underscore") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call my_name(0)]])], + pac_cv_prog_f77_name_mangle="upper stdcall") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call MY_NAME(0)]])], + pac_cv_prog_f77_name_mangle="upper") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call my_name(0)]])], + pac_cv_prog_f77_name_mangle="lower doubleunderscore") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call MY_name(0)]])], + pac_cv_prog_f77_name_mangle="mixed") + fi + if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.c <&AC_FD_CC + cat conftest.c >&AC_FD_CC + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call MY_name(0)]])], + pac_cv_prog_f77_name_mangle="mixed underscore") + fi + LIBS="$save_LIBS" + AC_LANG_RESTORE + fi + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f fconftest* +]) +# Make the actual definition +pac_namecheck=`echo X$pac_cv_prog_f77_name_mangle | sed 's/ /-/g'` +ifelse([$1],,[ +pac_cv_test_stdcall="" +case $pac_namecheck in + X) AC_MSG_WARN([Cannot determine Fortran naming scheme]) ;; + Xlower) AC_DEFINE(F77_NAME_LOWER,1,[Define if Fortran names are lowercase]) + F77_NAME_MANGLE="F77_NAME_LOWER" + ;; + Xlower-underscore) AC_DEFINE(F77_NAME_LOWER_USCORE,1,[Define if Fortran names are lowercase with a trailing underscore]) + F77_NAME_MANGLE="F77_NAME_LOWER_USCORE" + ;; + Xlower-doubleunderscore) AC_DEFINE(F77_NAME_LOWER_2USCORE,1,[Define if Fortran names containing an underscore have two trailing underscores]) + F77_NAME_MANGLE="F77_NAME_LOWER_2USCORE" + ;; + Xupper) AC_DEFINE(F77_NAME_UPPER,1,[Define if Fortran names are uppercase]) + F77_NAME_MANGLE="F77_NAME_UPPER" + ;; + Xmixed) AC_DEFINE(F77_NAME_MIXED,1,[Define if Fortran names preserve the original case]) + F77_NAME_MANGLE="F77_NAME_MIXED" + ;; + Xmixed-underscore) AC_DEFINE(F77_NAME_MIXED_USCORE,1,[Define if Fortran names preserve the original case and add a trailing underscore]) + F77_NAME_MANGLE="F77_NAME_MIXED_USCORE" + ;; + Xupper-stdcall) AC_DEFINE(F77_NAME_UPPER,1,[Define if Fortran names are uppercase]) + F77_NAME_MANGLE="F77_NAME_UPPER_STDCALL" + pac_cv_test_stdcall="__stdcall" + ;; + *) AC_MSG_WARN([Unknown Fortran naming scheme]) ;; +esac +AC_SUBST(F77_NAME_MANGLE) +# Get the standard call definition +# FIXME: This should use F77_STDCALL, not STDCALL (non-conforming name) +if test "X$pac_cv_test_stdcall" = "X" ; then + F77_STDCALL="" +else + F77_STDCALL="__stdcall" +fi +# +AC_DEFINE_UNQUOTED(STDCALL,$F77_STDCALL,[Define calling convention]) +],[$1]) +]) +dnl +dnl/*D +dnl PAC_PROG_F77_CHECK_SIZEOF - Determine the size in bytes of a Fortran +dnl type +dnl +dnl Synopsis: +dnl PAC_PROG_F77_CHECK_SIZEOF(type,[cross-size]) +dnl +dnl Output Effect: +dnl Sets SIZEOF_F77_uctype to the size if bytes of type. +dnl If type is unknown, the size is set to 0. +dnl If cross-compiling, the value cross-size is used (it may be a variable) +dnl For example 'PAC_PROG_F77_CHECK_SIZEOF(real)' defines +dnl 'SIZEOF_F77_REAL' to 4 on most systems. The variable +dnl 'pac_cv_sizeof_f77_' (e.g., 'pac_cv_sizeof_f77_real') is also set to +dnl the size of the type. +dnl If the corresponding variable is already set, that value is used. +dnl If the name has an '*' in it (e.g., 'integer*4'), the defined name +dnl replaces that with an underscore (e.g., 'SIZEOF_F77_INTEGER_4'). +dnl +dnl Notes: +dnl If the 'cross-size' argument is not given, 'autoconf' will issue an error +dnl message. You can use '0' to specify undetermined. +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_F77_CHECK_SIZEOF],[ +changequote(<<, >>)dnl +dnl The name to #define. +dnl If the arg value contains a variable, we need to update that +define(<>, translit(sizeof_f77_$1, [a-z *], [A-Z__]))dnl +dnl The cache variable name. +define(<>, translit(pac_cv_f77_sizeof_$1, [ *], [__]))dnl +changequote([, ])dnl +AC_CACHE_CHECK([for size of Fortran type $1],PAC_CV_NAME,[ +AC_REQUIRE([PAC_PROG_F77_NAME_MANGLE]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +cat < conftest.f + subroutine isize( ) + $1 i(2) + call cisize( i(1), i(2) ) + end +EOF +if test "X$ac_fcompile" = "X" ; then + ac_fcompile='${F77-f77} -c $FFLAGS conftest.f 1>&AC_FD_CC' +fi +if AC_TRY_EVAL(ac_fcompile) && test -s conftest.o ; then + mv conftest.o conftestf.o + AC_LANG_SAVE + AC_LANG_C + save_LIBS="$LIBS" + dnl Add the Fortran linking libraries + LIBS="conftestf.o $FLIBS $LIBS" + AC_TRY_RUN([#include +#ifdef F77_NAME_UPPER +#define cisize_ CISIZE +#define isize_ ISIZE +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define cisize_ cisize +#define isize_ isize +#endif +static int isize_val=0; +void cisize_(char *,char*); +void isize_(void); +void cisize_(char *i1p, char *i2p) +{ + isize_val = (int)(i2p - i1p); +} +int main(int argc, char **argv) +{ + FILE *f = fopen("conftestval", "w"); + if (!f) return 1; + isize_(); + fprintf(f,"%d\n", isize_val ); + return 0; +}], eval PAC_CV_NAME=`cat conftestval`,eval PAC_CV_NAME=0, +ifelse([$2],,,eval PAC_CV_NAME=$2)) + # Problem. If the process fails to run, then there won't be + # a good error message. For example, with one Portland Group + # installation, we had problems with finding the libpgc.so shared library + # The autoconf code for TRY_RUN doesn't capture the output from + # the test program (!) + + LIBS="$save_LIBS" + AC_LANG_RESTORE +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + ifelse([$2],,eval PAC_CV_NAME=0,eval PAC_CV_NAME=$2) +fi +]) +AC_DEFINE_UNQUOTED(PAC_TYPE_NAME,$PAC_CV_NAME,[Define size of PAC_TYPE_NAME]) +undefine([PAC_TYPE_NAME]) +undefine([PAC_CV_NAME]) +]) +dnl +dnl This version uses a Fortran program to link programs. +dnl This is necessary because some compilers provide shared libraries +dnl that are not within the default linker paths (e.g., our installation +dnl of the Portland Group compilers) +dnl +AC_DEFUN([PAC_PROG_F77_CHECK_SIZEOF_EXT],[ +changequote(<<,>>)dnl +dnl The name to #define. +dnl If the arg value contains a variable, we need to update that +define(<>, translit(sizeof_f77_$1, [a-z *], [A-Z__]))dnl +dnl The cache variable name. +define(<>, translit(pac_cv_f77_sizeof_$1, [ *], [__]))dnl +changequote([,])dnl +AC_CACHE_CHECK([for size of Fortran type $1],PAC_CV_NAME,[ +AC_REQUIRE([PAC_PROG_F77_NAME_MANGLE]) +if test "$cross_compiling" = yes ; then + ifelse([$2],,[AC_MSG_WARN([No value provided for size of $1 when cross-compiling])] +,eval PAC_CV_NAME=$2) +else + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat < conftestc.c +#include +#include "confdefs.h" +#ifdef F77_NAME_UPPER +#define cisize_ CISIZE +#define isize_ ISIZE +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define cisize_ cisize +#define isize_ isize +#endif +int cisize_(char *,char*); +int cisize_(char *i1p, char *i2p) +{ + int isize_val=0; + FILE *f = fopen("conftestval", "w"); + if (!f) return 1; + isize_val = (int)(i2p - i1p); + fprintf(f,"%d\n", isize_val ); + fclose(f); + return 0; +} +EOF + pac_tmp_compile='$CC -c $CFLAGS $CPPFLAGS conftestc.c >&5' + if AC_TRY_EVAL(pac_tmp_compile) && test -s conftestc.o ; then + AC_LANG_SAVE + AC_LANG_FORTRAN77 + saveLIBS=$LIBS + LIBS="conftestc.o $LIBS" + dnl TRY_RUN does not work correctly for autoconf 2.13 (the + dnl macro includes C-preprocessor directives that are not + dnl valid in Fortran. Instead, we do this by hand + cat >conftest.f <conftest2.f <conftest.f </dev/null 2>&1 ; then + AC_MSG_RESULT(yes) + AC_MSG_CHECKING([whether routines compiled with $1 can be linked with ones compiled without $1]) + rm -f conftest2.out + rm -f conftest.bas + ac_fscompile3='${F77-f77} -c $save_FFLAGS conftest2.f >conftest2.out 2>&1' + ac_fscompilelink4='${F77-f77} $FFLAGS -o conftest conftest2.o conftest.f $LDFLAGS >conftest.bas 2>&1' + if AC_TRY_EVAL(ac_fscompile3) && test -s conftest2.o ; then + if AC_TRY_EVAL(ac_fscompilelink4) && test -x conftest ; then + if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then + ac_result="yes" + else + echo "configure: Compiler output differed in two cases" >&AC_FD_CC + diff -b conftest.out conftest.bas >&AC_FD_CC + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest2.f >&AC_FD_CC + fi + else + # diff + echo "configure: Compiler output differed in two cases" >&AC_FD_CC + diff -b conftest.out conftest.bas >&AC_FD_CC + fi + else + # try_eval(fscompilelink2) + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + fi + if test "$ac_result" != "yes" -a -s conftest.out ; then + cat conftest.out >&AC_FD_CC + fi +else + # Could not compile without the option! + echo "configure: Could not compile program" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + cat conftest.bas >&AC_FD_CC +fi +# Restore FFLAGS before 2nd/3rd argument commands are executed, +# as 2nd/3rd argument command could be modifying FFLAGS. +FFLAGS="$save_FFLAGS" +if test "$ac_result" = "yes" ; then + AC_MSG_RESULT(yes) + ifelse($2,,FOPTIONS="$FOPTIONS $1",$2) +else + AC_MSG_RESULT(no) + $3 +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +]) + + +dnl/*D +dnl PAC_PROG_F77_CMDARGS - Determine how to access the command line from +dnl Fortran 77 +dnl +dnl Output Effects: +dnl The following variables are set: +dnl.vb +dnl F77_GETARG - Statement to get an argument i into string s +dnl F77_IARGC - Routine to return the number of arguments +dnl FXX_MODULE - Module command when using Fortran 90 compiler +dnl F77_GETARGDECL - Declaration of routine used for F77_GETARG +dnl F77_GETARG_FFLAGS - Flags needed when compiling/linking +dnl F77_GETARG_LDFLAGS - Flags needed when linking +dnl.ve +dnl If 'F77_GETARG' has a value, then that value and the values for these +dnl other symbols will be used instead. If no approach is found, all of these +dnl variables will have empty values. +dnl If no other approach works and a file 'f77argdef' is in the directory, +dnl that file will be sourced for the values of the above four variables. +dnl +dnl In most cases, you should add F77_GETARG_FFLAGS to the FFLAGS variable +dnl and F77_GETARG_LDFLAGS to the LDFLAGS variable, to ensure that tests are +dnl performed on the compiler version that will be used. +dnl +dnl 'AC_SUBST' is called for all six variables. +dnl +dnl One complication is that on systems with multiple Fortran compilers, +dnl some libraries used by one Fortran compiler may have been (mis)placed +dnl in a common location. We have had trouble with libg2c in particular. +dnl To work around this, we test whether iargc etc. work first. This +dnl will catch most systems and will speed up the tests. +dnl +dnl Next, the libraries are only added if they are needed to complete a +dnl link; they aren''t added just because they exist. +dnl +dnl f77argdef +dnl D*/ +dnl +dnl Random notes +dnl You can export the command line arguments from C to the g77 compiler +dnl using +dnl extern char **__libc_argv; +dnl extern int __libc_argc; +dnl f_setarg( __libc_argc, __libc_argv ); +dnl +AC_DEFUN([PAC_PROG_F77_CMDARGS],[ +found_cached="yes" +AC_MSG_CHECKING([for routines to access the command line from Fortran 77]) +AC_CACHE_VAL(pac_cv_prog_f77_cmdarg, +[ + AC_MSG_RESULT([searching...]) + found_cached="no" + # First, we perform a quick check. Does iargc and getarg work? + fxx_module="${FXX_MODULE:-}" + f77_getargdecl="${F77_GETARGDECL:-external getarg}" + f77_getarg="${F77_GETARG:-call GETARG(i,s)}" + f77_iargc="${F77_IARGC:-IARGC()}" + # + # Grumble. The Absoft Fortran compiler computes i - i as 0 and then + # 1.0 / 0 at compile time, even though the code may never be executed. + # What we need is a way to generate an error, so the second usage of i + # was replaced with f77_iargc. + cat > conftest.f </dev/null 2>&1 ; then + found_answer="yes" + FXX_MODULE="$fxx_module" + F77_GETARGDECL="$f77_getargdecl" + F77_GETARG="$f77_getarg" + F77_IARGC="$f77_iargc" + AC_MSG_RESULT(yes) + fi + fi + fi + if test $found_answer = "no" ; then + AC_MSG_RESULT(no) + # Grumph. Here are a bunch of different approaches + # We have several axes the check: + # Library to link with (none, -lU77 (HPUX), -lg2c (LINUX f77)) + # PEPCF90 (Intel ifc) + # The first line is a dummy + # (we experimented with using a , but this caused other + # problems because we need in the IFS) + trial_LIBS="0 -lU77 -lPEPCF90" + if test "$NOG2C" != "1" ; then + trial_LIBS="$trial_LIBS -lg2c" + fi + # Discard libs that are not availble: + save_IFS="$IFS" + # Make sure that IFS includes a space, or the tests that run programs + # may fail + IFS=" "" +" + save_trial_LIBS="$trial_LIBS" + trial_LIBS="" + cat > conftest.f <, the space is important + # To make the Absoft f77 and f90 work together, we need to prefer the + # upper case versions of the arguments. They also require libU77. + # -YCFRL=1 causes Absoft f90 to work with g77 and similar (f2c-based) + # Fortran compilers + # + # Problem: The Intel efc compiler hangs when presented with -N109 . + # The only real fix for this is to detect this compiler and exclude + # the test. We may want to reorganize these tests so that if we + # can compile code without special options, we never look for them. + # + using_intel_efc="no" + pac_test_msg=`$F77 -V 2>&1 | grep 'Intel(R) Fortran Itanium'` + if test "$pac_test_msg" != "" ; then + using_intel_efc="yes" + fi + if test "$using_intel_efc" = "yes" ; then + trial_FLAGS="000" + else + trial_FLAGS="000 +-N109 +-f +-YEXT_NAMES=UCS +-YEXT_NAMES=LCS +-YCFRL=1 ++U77" + fi + # Discard options that are not available: + # (IFS already saved above) + IFS=" "" +" + save_trial_FLAGS="$trial_FLAGS" + trial_FLAGS="" + for flag in $save_trial_FLAGS ; do + if test "$flag" = " " -o "$flag" = "000" ; then + opt_ok="yes" + else + PAC_F77_CHECK_COMPILER_OPTION($flag,opt_ok=yes,opt_ok=no) + fi + if test "$opt_ok" = "yes" ; then + if test "$flag" = " " -o "$flag" = "000" ; then + fflag="" + else + fflag="$flag" + fi + # discard options that don't allow mixed-case name matching + cat > conftest.f < conftest.f </dev/null 2>&1 ; then + found_answer="yes" + fi + else + found_answer="yes" + fi + fi + IFS=" "" +" + if test "$found_answer" = "yes" ; then + AC_MSG_RESULT([yes]) + pac_cv_prog_f77_cmdarg="$MSG" + pac_cv_prog_f77_cmdarg_fflags="$flags" + pac_cv_prog_f77_cmdarg_ldflags="$libs" + break + else + AC_MSG_RESULT([no]) + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + fi + done + done + IFS="$save_IFS" + rm -f conftest.* + trial=`expr $trial + 1` + done +fi +pac_cv_F77_GETARGDECL="$F77_GETARGDECL" +pac_cv_F77_IARGC="$F77_IARGC" +pac_cv_F77_GETARG="$F77_GETARG" +pac_cv_FXX_MODULE="$FXX_MODULE" +]) +if test "$found_cached" = "yes" ; then + AC_MSG_RESULT([$pac_cv_prog_f77_cmdarg]) +elif test -z "$pac_cv_F77_IARGC" ; then + AC_MSG_WARN([Could not find a way to access the command line from Fortran 77]) +fi +# Set the variable values based on pac_cv_prog_xxx +F77_GETARGDECL="$pac_cv_F77_GETARGDECL" +F77_IARGC="$pac_cv_F77_IARGC" +F77_GETARG="$pac_cv_F77_GETARG" +FXX_MODULE="$pac_cv_FXX_MODULE" +F77_GETARG_FFLAGS="$pac_cv_prog_f77_cmdarg_fflags" +F77_GETARG_LDFLAGS="$pac_cv_prog_f77_cmdarg_ldflags" +AC_SUBST(F77_GETARGDECL) +AC_SUBST(F77_IARGC) +AC_SUBST(F77_GETARG) +AC_SUBST(FXX_MODULE) +AC_SUBST(F77_GETARG_FFLAGS) +AC_SUBST(F77_GETARG_LDFLAGS) +]) + + +dnl/*D +dnl PAC_PROG_F77_LIBRARY_DIR_FLAG - Determine the flag used to indicate +dnl the directories to find libraries in +dnl +dnl Notes: +dnl Many compilers accept '-Ldir' just like most C compilers. +dnl Unfortunately, some (such as some HPUX Fortran compilers) do not, +dnl and require instead either '-Wl,-L,dir' or something else. This +dnl command attempts to determine what is accepted. The flag is +dnl placed into 'F77_LIBDIR_LEADER'. +dnl +dnl D*/ +dnl +dnl An earlier version of this only tried the arguments without using +dnl a library. This failed when the HP compiler complained about the +dnl arguments, but produced an executable anyway. +AC_DEFUN([PAC_PROG_F77_LIBRARY_DIR_FLAG],[ +if test "X$F77_LIBDIR_LEADER" = "X" ; then +AC_CACHE_CHECK([for Fortran 77 flag for library directories], +pac_cv_prog_f77_library_dir_flag, +[ + + rm -f conftest.* conftest1.* + cat > conftest.f < conftest1.f <&AC_FD_CC + cat conftest1.f >&AC_FD_CC + fi + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* +]) + AC_SUBST(F77_LIBDIR_LEADER) + if test "X$pac_cv_prog_f77_library_dir_flag" != "X" ; then + F77_LIBDIR_LEADER="$pac_cv_prog_f77_library_dir_flag" + fi +fi +]) + +dnl/*D +dnl PAC_PROG_F77_HAS_INCDIR - Check whether Fortran accepts -Idir flag +dnl +dnl Syntax: +dnl PAC_PROG_F77_HAS_INCDIR(directory,action-if-true,action-if-false) +dnl +dnl Output Effect: +dnl Sets 'F77_INCDIR' to the flag used to choose the directory. +dnl +dnl Notes: +dnl This refers to the handling of the common Fortran include extension, +dnl not to the use of '#include' with the C preprocessor. +dnl If directory does not exist, it will be created. In that case, the +dnl directory should be a direct descendant of the current directory. +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_F77_HAS_INCDIR],[ +checkdir=$1 +AC_CACHE_CHECK([for include directory flag for Fortran], +pac_cv_prog_f77_has_incdir,[ +if test ! -d $checkdir ; then mkdir $checkdir ; fi +cat >$checkdir/conftestf.h <conftest.f < conftest.$ac_ext <&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -rf conftest* + pac_cv_prog_f77_allows_unused_externals="no" + $4 +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +# +AC_LANG_RESTORE +]) +if test "X$pac_cv_prog_f77_allows_unused_externals" = "Xyes" ; then + ifelse([$1],,:,[$1]) +else + ifelse([$2],,:,[$2]) +fi +]) + + +dnl /*D +dnl PAC_PROG_F77_HAS_POINTER - Determine if Fortran allows pointer type +dnl +dnl Synopsis: +dnl PAC_PROG_F77_HAS_POINTER(action-if-true,action-if-false) +dnl D*/ +AC_DEFUN([PAC_PROG_F77_HAS_POINTER],[ +AC_CACHE_CHECK([whether Fortran has pointer declaration], +pac_cv_prog_f77_has_pointer,[ +AC_LANG_SAVE +AC_LANG_FORTRAN77 +AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ + integer M + pointer (MPTR,M) + data MPTR/0/ +])], + pac_cv_prog_f77_has_pointer="yes", + pac_cv_prog_f77_has_pointer="no") +AC_LANG_RESTORE +]) +if test "$pac_cv_prog_f77_has_pointer" = "yes" ; then + ifelse([$1],,:,[$1]) +else + ifelse([$2],,:,[$2]) +fi +]) + + +dnl pac_prog_f77_run_proc_from_c( c main program, fortran routine, +dnl action-if-works, action-if-fails, +dnl cross-action ) +dnl Fortran routine MUST be named ftest unless you include code +dnl to select the appropriate Fortran name. +dnl +AC_DEFUN([PAC_PROG_F77_RUN_PROC_FROM_C],[ +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +cat < conftest.f +$2 +EOF +dnl +if test "X$ac_fcompile" = "X" ; then + ac_fcompile='${F77-f77} -c $FFLAGS conftest.f 1>&AC_FD_CC' +fi +if AC_TRY_EVAL(ac_fcompile) && test -s conftest.o ; then + mv conftest.o conftestf.o + AC_LANG_SAVE + AC_LANG_C + save_LIBS="$LIBS" + LIBS="conftestf.o $FLIBS $LIBS" + AC_TRY_RUN([#include +#ifdef F77_NAME_UPPER +#define ftest_ FTEST +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define ftest_ ftest +#endif +$1 +], [$3], [$4], [$5] ) + LIBS="$save_LIBS" + AC_LANG_RESTORE +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +]) + + +dnl PAC_PROG_F77_IN_C_LIBS +dnl +dnl Find the essential libraries that are needed to use the C linker to +dnl create a program that includes a trival Fortran code. +dnl +dnl For example, all pgf90 compiled objects include a reference to the +dnl symbol pgf90_compiled, found in libpgf90 . +dnl +dnl There is an additional problem. To *run* programs, we may need +dnl additional arguments; e.g., if shared libraries are used. Even +dnl with autoconf 2.52, the autoconf macro to find the library arguments +dnl doesn't handle this, either by detecting the use of -rpath or +dnl by trying to *run* a trivial program. It only checks for *linking*. +dnl +dnl +AC_DEFUN([PAC_PROG_F77_IN_C_LIBS],[ +AC_MSG_CHECKING([for which Fortran libraries are needed to link C with Fortran]) +F77_IN_C_LIBS="$FLIBS" +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +cat < conftest.f + subroutine ftest + end +EOF +dnl +if test "X$ac_fcompile" = "X" ; then + ac_fcompile='${F77-f77} -c $FFLAGS conftest.f 1>&AC_FD_CC' +fi +if AC_TRY_EVAL(ac_fcompile) && test -s conftest.o ; then + mv conftest.o mconftestf.o + AC_LANG_SAVE + AC_LANG_C + save_LIBS="$LIBS" + dnl First try with no libraries + LIBS="mconftestf.o $save_LIBS" + AC_TRY_LINK([#include ],[ +#ifdef F77_NAME_UPPER +#define ftest_ FTEST +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define ftest_ ftest +#endif +extern void ftest_(void); +ftest_(); +], [link_worked=yes], [link_worked=no] ) + if test "$link_worked" = "no" ; then + flibdirs=`echo $FLIBS | tr ' ' '\012' | grep '\-L' | tr '\012' ' '` + fliblibs=`echo $FLIBS | tr ' ' '\012' | grep -v '\-L' | tr '\012' ' '` + for flibs in $fliblibs ; do + LIBS="mconftestf.o $flibdirs $flibs $save_LIBS" + AC_TRY_LINK([#include ],[ +#ifdef F77_NAME_UPPER +#define ftest_ FTEST +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define ftest_ ftest +#endif +extern void ftest_(void); +ftest_(); +], [link_worked=yes], [link_worked=no] ) + if test "$link_worked" = "yes" ; then + F77_IN_C_LIBS="$flibdirs $flibs" + break + fi + done + if test "$link_worked" = "no" ; then + # try to add libraries until it works... + flibscat="" + for flibs in $fliblibs ; do + flibscat="$flibscat $flibs" + LIBS="mconftestf.o $flibdirs $flibscat $save_LIBS" + AC_TRY_LINK([#include ],[ +#ifdef F77_NAME_UPPER +#define ftest_ FTEST +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define ftest_ ftest +#endif +extern void ftest_(void); +ftest_(); +], [link_worked=yes], [link_worked=no] ) + if test "$link_worked" = "yes" ; then + F77_IN_C_LIBS="$flibdirs $flibscat" + break + fi + done + fi + else + # No libraries needed + F77_IN_C_LIBS="" + fi + LIBS="$save_LIBS" + AC_LANG_RESTORE +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* mconftest* +if test -z "$F77_IN_C_LIBS" ; then + AC_MSG_RESULT(none) +else + AC_MSG_RESULT($F77_IN_C_LIBS) +fi +]) + +dnl Test to see if we should use C or Fortran to link programs whose +dnl main program is in Fortran. We may find that neither work because +dnl we need special libraries in each case. +dnl +AC_DEFUN([PAC_PROG_F77_LINKER_WITH_C],[ +AC_LANG_SAVE +AC_LANG_C +AC_TRY_COMPILE(, +long long a;,AC_DEFINE(HAVE_LONG_LONG,1,[Define if long long allowed])) +AC_MSG_CHECKING([for linker for Fortran main programs]) +dnl +dnl Create a program that uses multiplication and division in case +dnl that requires special libraries +cat > conftest.c < conftest.f <&AC_FD_CC ; then + AC_MSG_RESULT([Use Fortran to link programs]) + elif ${CC} ${CFLAGS} -o conftest conftest.o conftest1.o $LDFLAGS $FLIBS 2>&AC_FD_CC ; then + AC_MSG_RESULT([Use C with FLIBS to link programs]) + F77LINKER="$CC" + F77_LDFLAGS="$F77_LDFLAGS $FLIBS" + else + AC_MSG_RESULT([Unable to determine how to link Fortran programs with C]) + fi +else + AC_MSG_ERROR([Could not compile Fortran test program]) +fi +AC_LANG_RESTORE +]) + +dnl Check to see if a C program can be linked when using the libraries +dnl needed by C programs +dnl +AC_DEFUN([PAC_PROG_F77_CHECK_FLIBS], +[AC_MSG_CHECKING([whether C can link with $FLIBS]) +# Try to link a C program with all of these libraries +save_LIBS="$LIBS" +LIBS="$LIBS $FLIBS" +AC_TRY_LINK(,[int a;],runs=yes,runs=no) +LIBS="$save_LIBS" +AC_MSG_RESULT($runs) +if test "$runs" = "no" ; then + AC_MSG_CHECKING([for which libraries can be used]) + pac_ldirs="" + pac_libs="" + pac_other="" + for name in $FLIBS ; do + case $name in + -l*) pac_libs="$pac_libs $name" ;; + -L*) pac_ldirs="$pac_ldirs $name" ;; + *) pac_other="$pac_other $name" ;; + esac + done + save_LIBS="$LIBS" + keep_libs="" + for name in $pac_libs ; do + LIBS="$save_LIBS $pac_ldirs $pac_other $name" + AC_TRY_LINK(,[int a;],runs=yes,runs=no) + if test $runs = "yes" ; then keep_libs="$keep_libs $name" ; fi + done + AC_MSG_RESULT($keep_libs) + LIBS="$save_LIBS" + FLIBS="$pac_ldirs $pac_other $keep_libs" +fi +]) + +dnl Test for extra libraries needed when linking C routines that use +dnl stdio with Fortran. This test was created for OSX, which +dnl sometimes requires -lSystemStubs. If another library is needed, +dnl add it to F77_OTHER_LIBS +AC_DEFUN([PAC_PROG_F77_AND_C_STDIO_LIBS],[ + # To simply the code in the cache_check macro, chose the routine name + # first, in case we need it + confname=conf1_ + case "$pac_cv_prog_f77_name_mangle" in + "lower underscore") confname=conf1_ ;; + "upper stdcall") confname=CONF1 ;; + upper) confname=CONF1 ;; + "lower doubleunderscore") confname=conf1_ ;; + lower) confname=conf1 ;; + "mixed underscore") confname=conf1_ ;; + mixed) confname=conf1 ;; + esac + + AC_CACHE_CHECK([what libraries are needed to link Fortran programs with C routines that use stdio],pac_cv_prog_f77_and_c_stdio_libs,[ + pac_cv_prog_f77_and_c_stdio_libs=unknown + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* + cat >conftest.f <conftestc.c < +int $confname( int a ) +{ printf( "The answer is %d\n", a ); fflush(stdout); return 0; } +EOF + tmpcmd='${CC-cc} -c $CFLAGS conftestc.c 1>&AC_FD_CC' + if AC_TRY_EVAL(tmpcmd) && test -s conftestc.o ; then + : + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftestc.c >&AC_FD_CC + fi + + tmpcmd='${F77-f77} $FFLAGS -o conftest conftest.f conftestc.o 1>&AC_FD_CC' + if AC_TRY_EVAL(tmpcmd) && test -x conftest ; then + pac_cv_prog_f77_and_c_stdio_libs=none + else + # Try again with -lSystemStubs + tmpcmd='${F77-f77} $FFLAGS -o conftest conftest.f conftestc.o -lSystemStubs 1>&AC_FD_CC' + if AC_TRY_EVAL(tmpcmd) && test -x conftest ; then + pac_cv_prog_f77_and_c_stdio_libs="-lSystemStubs" + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftestc.c >&AC_FD_CC + echo "configure: with Fortran 77 program:" >&AC_FD_CC + cat conftest.f >&AC_FD_CC + fi + fi + + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM + rm -f conftest* +]) +if test "$pac_cv_prog_f77_and_c_stdio_libs" != none -a \ + "$pac_cv_prog_f77_and_c_stdio_libs" != unknown ; then + F77_OTHER_LIBS="$F77_OTHER_LIBS $pac_cv_prog_f77_and_c_stdio_libs" +fi +]) + +dnl Check that the FLIBS determined by AC_F77_LIBRARY_LDFLAGS is valid. +dnl That macro (at least as of autoconf 2.59) attempted to parse the output +dnl of the compiler when asked to be verbose; in the case of the Fujitsu +dnl frt Fortran compiler, it included files that frt looked for and then +dnl discarded because they did not exist. +AC_DEFUN([PAC_PROG_F77_FLIBS_VALID],[ + pac_cv_f77_flibs_valid=unknown + AC_MSG_CHECKING([whether $F77 accepts the FLIBS found by autoconf]) + AC_LANG_SAVE + AC_LANG_FORTRAN77 +dnl We can't use TRY_LINK, because it wants a routine name, not a +dnl declaration. The following is the body of TRY_LINK, slightly modified. +cat > conftest.$ac_ext <&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + pac_cv_f77_flibs_valid=no + fi +AC_MSG_RESULT($pac_cv_f77_flibs_valid) +if test $pac_cv_f77_flibs_valid = no ; then + # See which ones *are* valid + AC_MSG_CHECKING([for valid entries in FLIBS]) + goodFLIBS="" + saveFLIBS=$FLIBS + FLIBS="" + for arg in $saveFLIBS ; do + FLIBS="$goodFLIBS $arg" + if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then + goodFLIBS=$FLIBS + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + fi + done + FLIBS=$goodFLIBS + AC_MSG_RESULT($FLIBS) +fi +# +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +AC_LANG_RESTORE +]) + + +AC_DEFUN([PAC_PROG_F77_OBJ_LINKS_WITH_C],[ +AC_MSG_CHECKING([whether Fortran 77 and C objects are compatible]) +dnl +rm -rf conftestc.dSYM +rm -f conftestc* +dnl construct with a C function with all possible F77 name mangling schemes. +cat <<_EOF > conftestc.c +/* lower */ +void c_subpgm( int *rc ); +void c_subpgm( int *rc ) { *rc = 1; } + +/* lower underscore */ +void c_subpgm_( int *rc ); +void c_subpgm_( int *rc ) { *rc = 2; } + +/* upper */ +void C_SUBPGM( int *rc ); +void C_SUBPGM( int *rc ) { *rc = 3; } + +/* lower doubleunderscore */ +void c_subpgm__( int *rc ); +void c_subpgm__( int *rc ) { *rc = 4; } + +/* mixed */ +void C_subpgm( int *rc ); +void C_subpgm( int *rc ) { *rc = 5; } + +/* mixed underscore */ +void C_subpgm_( int *rc ); +void C_subpgm_( int *rc ) { *rc = 6; } +_EOF +dnl +dnl Compile the C function into object file. +dnl +pac_Ccompile='${CC-cc} -c $CFLAGS conftestc.c 1>&AC_FD_CC' +if AC_TRY_EVAL(pac_Ccompile) && test -s conftestc.${ac_objext} ; then + pac_c_working=yes +else + pac_c_working=no + echo "configure: failed C program was:" >&AC_FD_CC + cat conftestc.c >&AC_FD_CC +fi +dnl +rm -rf conftestf.dSYM +rm -f conftestf* +cat <<_EOF > conftestf.f + program test + integer rc + rc = -1 + call c_subpgm( rc ) + write(6,*) "rc=", rc + end +_EOF +dnl - compile the fortran program into object file +pac_Fcompile='${F77-f77} -c $FFLAGS conftestf.f 1>&AC_FD_CC' +if AC_TRY_EVAL(pac_Fcompile) && test -s conftestf.${ac_objext} ; then + pac_f77_working=yes +else + pac_f77_working=no + echo "configure: failed F77 program was:" >&AC_FD_CC + cat conftestf.f >&AC_FD_CC +fi +dnl +if test "$pac_c_working" = "yes" -a "$pac_f77_working" = "yes" ; then +dnl Try linking with F77 compiler + rm -f conftest${ac_exeext} + pac_link='$F77 $FFLAGS -o conftest${ac_exeext} conftestf.${ac_objext} conftestc.${ac_objext} $LDFLAGS >&AC_FD_CC' + if AC_TRY_EVAL(pac_link) && test -s conftest${ac_exeext} ; then + AC_MSG_RESULT(yes) + rm -fr conftestf.dSYM conftestc.dSYM conftest.dSYM + rm -f conftest* + else +dnl Try linking with C compiler + rm -f conftest${ac_exeext} + pac_link='$CC $CFLAGS -o conftest${ac_exeext} conftestf.${ac_objext} conftestc.${ac_objext} $LDFLAGS $FLIBS >&AC_FD_CC' + if AC_TRY_EVAL(pac_link) && test -s conftest${ac_exeext} ; then + AC_MSG_RESULT(yes) + rm -fr conftestf.dSYM conftestc.dSYM conftest.dSYM + rm -f conftest* + else + AC_MSG_RESULT(no) + AC_CHECK_PROG(FILE, file, file, []) + if test "X$FILE" != "X" ; then + fobjtype="`${FILE} conftestf.${ac_objext} | sed -e \"s|conftestf\.${ac_objext}||g\"`" + cobjtype="`${FILE} conftestc.${ac_objext} | sed -e \"s|conftestc\.${ac_objext}||g\"`" + if test "$fobjtype" != "$cobjtype" ; then + AC_MSG_ERROR([**** Incompatible Fortran and C Object File Types! **** +F77 Object File Type produced by \"${F77} ${FFLAGS}\" is : ${fobjtype}. + C Object File Type produced by \"${CC} ${CFLAGS}\" is : ${cobjtype}.]) + fi + fi + fi + fi +else + AC_MSG_RESULT([failed compilation]) +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_f77new.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_f77new.m4 new file mode 100644 index 0000000000..7da5057b97 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_f77new.m4 @@ -0,0 +1,54 @@ +dnl /*D +dnl PAC_F77_WORKS_WITH_CPP +dnl +dnl Checks if Fortran 77 compiler works with C preprocessor +dnl +dnl Most systems allow the Fortran compiler to process .F and .F90 files +dnl using the C preprocessor. However, some systems either do not +dnl allow this or have serious bugs (OSF Fortran compilers have a bug +dnl that generates an error message from cpp). The following test +dnl checks to see if .F works, and if not, whether "cpp -P -C" can be used +dnl D*/ +AC_DEFUN([PAC_F77_WORKS_WITH_CPP],[ +AC_REQUIRE([AC_PROG_CPP]) +AC_MSG_CHECKING([whether Fortran 77 compiler processes .F files with C preprocessor]) +AC_LANG_PUSH([Fortran 77]) +saved_f77_ext=${ac_ext} +ac_ext="F" +saved_FFLAGS="$FFLAGS" +FFLAGS="$FFLAGS $CPPFLAGS" +AC_LANG_CONFTEST([ + AC_LANG_SOURCE([ + program main +#define ASIZE 10 + integer a(ASIZE) + end + ]) +]) +AC_COMPILE_IFELSE([],[ + pac_cv_f77_accepts_F=yes + ifelse([$1],[],[],[$1=""]) +],[ + pac_cv_f77_accepts_F=no + ifelse([$1],[],[:],[$1="false"]) +]) +# Restore Fortran 77's ac_ext but not FFLAGS +ac_ext="$saved_f77_ext" + +if test "$pac_cv_f77_accepts_F" != "yes" ; then + pac_cpp_f77="$ac_cpp -C -P conftest.F > conftest.$ac_ext" + PAC_RUNLOG_IFELSE([$pac_cpp_f77],[ + if test -s conftest.${ac_ext} ; then + AC_COMPILE_IFELSE([],[ + pac_cv_f77_accepts_F="no, use cpp" + ifelse([$1],[],[],[$1="$CPP -C -P"]) + ],[]) + rm -f conftest.${ac_ext} + fi + ],[]) +fi +FFLAGS="$saved_FFLAGS" +rm -f conftest.F +AC_LANG_POP([Fortran 77]) +AC_MSG_RESULT([$pac_cv_f77_accepts_F]) +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_fc.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_fc.m4 new file mode 100644 index 0000000000..f5e598dc05 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_fc.m4 @@ -0,0 +1,963 @@ +dnl /*D +dnl PAC_PROG_FC([COMPILERS]) +dnl +dnl COMPILERS is a space separated list of Fortran 77 compilers to search +dnl for. Fortran 95 isn't strictly backwards-compatible with Fortran 77, +dnl but `f95' is worth trying. +dnl +dnl Compilers are ordered by +dnl 1. F77, F90, F95 +dnl 2. Good/tested native compilers, bad/untested native compilers +dnl 3. Wrappers around f2c go last. +dnl +dnl `fort77' is a wrapper around `f2c'. +dnl It is believed that under HP-UX `fort77' is the name of the native +dnl compiler. On some Cray systems, fort77 is a native compiler. +dnl frt is the Fujitsu F77 compiler. +dnl pgf77 and pgf90 are the Portland Group F77 and F90 compilers. +dnl xlf/xlf90/xlf95 are IBM (AIX) F77/F90/F95 compilers. +dnl lf95 is the Lahey-Fujitsu compiler. +dnl fl32 is the Microsoft Fortran "PowerStation" compiler. +dnl af77 is the Apogee F77 compiler for Intergraph hardware running CLIX. +dnl epcf90 is the "Edinburgh Portable Compiler" F90. +dnl fort is the Compaq Fortran 90 (now 95) compiler for Tru64 and Linux/Alpha. +dnl pathf90 is the Pathscale Fortran 90 compiler +dnl ifort is another name for the Intel f90 compiler +dnl efc - An older Intel compiler (?) +dnl ifc - An older Intel compiler +dnl fc - A compiler on some unknown system. This has been removed because +dnl it may also be the name of a command for something other than +dnl the Fortran compiler (e.g., fc=file system check!) +dnl D*/ +AC_DEFUN([PAC_PROG_FC],[ +PAC_PUSH_FLAG([FCFLAGS]) +AC_PROG_FC([m4_default([$1], + [ifort pgf90 pathf90 pathf95 xlf90 xlf95 f90 epcf90 f95 fort lf95 \ + gfortran g95 ifc efc])]) +PAC_POP_FLAG([FCFLAGS]) +]) +dnl +dnl PAC_FC_EXT checks for the default Fortran 90 program extension, f90 then f. +dnl This could be replaced by AC_FC_SRCEXT but since AC_FC_SRCEXT +dnl adds FCFLAGS_ext, which is used to modify FCFLAGS or Makefile.in. +dnl So will do this later. +dnl +AC_DEFUN([PAC_FC_EXT],[ +AC_MSG_CHECKING([for extension for Fortran 90 programs]) +ac_fc_srcext="f90" +AC_LANG_PUSH(Fortran) +AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM() +],[ + AC_MSG_RESULT([f90]) +],[ + ac_fc_srcext="f" + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM() + ],[ + AC_MSG_RESULT([f]) + ],[ + AC_MSG_RESULT([unknown!]) + ]) +]) +AC_LANG_POP(Fortran) +]) +dnl +dnl Internal routine for testing F90 +dnl PAC_PROG_FC_WORKS() +dnl +AC_DEFUN([PAC_PROG_FC_WORKS],[ +AC_REQUIRE([PAC_FC_EXT]) +AC_LANG_PUSH(Fortran) +AC_MSG_CHECKING([whether the Fortran 90 compiler ($FC $FCFLAGS $LDFLAGS) works]) +AC_LINK_IFELSE([ + AC_LANG_SOURCE([ + program conftest + integer, dimension(10) :: n + end + ]) +],[ + pac_cv_prog_fc_works="yes" + AC_MSG_RESULT([$pac_cv_prog_fc_works]) + AC_MSG_CHECKING([whether the Fortran 90 compiler ($FC $FCFLAGS $LDFLAGS) is a cross-compiler]) + AC_RUN_IFELSE([], + [pac_cv_prog_fc_cross="no"], + [pac_cv_prog_fc_cross="yes"], + [pac_cv_prog_fc_cross="$cross_compiling"] + ) + AC_MSG_RESULT($pac_cv_prog_fc_cross) +],[ + pac_cv_prog_fc_works="no" + AC_MSG_WARN([installation or configuration problem: Fortran 90 compiler cannot create executables.]) +]) +# The intel compiler sometimes generates these work.pc and .pcl files +rm -f work.pc work.pcl +AC_LANG_POP(Fortran) +dnl cross_compiling no longer maintained by autoconf as part of the +dnl AC_LANG changes. If we set it here, a later AC_LANG may not +dnl restore it (in the case where one compiler claims to be a cross compiler +dnl and another does not) +dnl cross_compiling=$pac_cv_prog_f90_cross +]) +dnl/*D +dnl PAC_PROG_FC_INT_KIND - Determine kind parameter for an integer with +dnl the specified number of bytes. +dnl +dnl Synopsis: +dnl PAC_PROG_FC_INT_KIND(variable-to-set,number-of-bytes,[cross-size]) +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_FC_INT_KIND],[ +# Set the default +$1=-1 +if test "$pac_cv_prog_fc_cross" = "yes" ; then + $1="$3" +else + AC_LANG_PUSH(Fortran) + AC_MSG_CHECKING([for Fortran 90 integer kind for $2-byte integers]) + # Convert bytes to digits + case $2 in + 1) sellen=2 ;; + 2) sellen=4 ;; + 4) sellen=8 ;; + 8) sellen=16 ;; + 16) sellen=30 ;; + *) sellen=8 ;; + esac + # Check for cached value + eval testval=\$"pac_cv_prog_fc_int_kind_$sellen" + if test -n "$testval" ; then + AC_MSG_RESULT([$testval (cached)]) + $1=$testval + else + KINDVAL="unavailable" + eval "pac_cv_prog_fc_int_kind_$sellen"=-1 + AC_RUN_IFELSE([ + AC_LANG_SOURCE([ + program main + integer ii + ii = selected_int_kind($sellen) + open(8, file="conftest1.out", form="formatted") + write (8,*) ii + close(8) + stop + end + ]) + ],[pac_run_ok=yes],[pac_run_ok=no]) + if test "$pac_run_ok" = "yes" ; then + if test -s conftest1.out ; then + # Because of write, there may be a leading blank. + KINDVAL=`cat conftest1.out | sed 's/ //g'` + eval "pac_cv_prog_fc_int_kind_$sellen"=$KINDVAL + $1=$KINDVAL + fi + fi + AC_MSG_RESULT([$KINDVAL]) + fi # not cached + AC_LANG_POP(Fortran) +fi # is not cross compiling +])dnl +dnl +dnl ------------------------------------------------------------------------ +dnl Special characteristics that have no autoconf counterpart but that +dnl we need as part of the Fortran 90 support. To distinquish these, they +dnl have a [PAC] prefix. +dnl +dnl +dnl PAC_FC_MODULE_EXT(action if found,action if not found) +dnl +AC_DEFUN([PAC_FC_MODULE_EXT], +[AC_CACHE_CHECK([for Fortran 90 module extension], +pac_cv_fc_module_ext,[ +pac_cv_fc_module_case="unknown" +AC_LANG_PUSH(Fortran) +AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ + module conftest + integer n + parameter (n=1) + end module conftest + ]) +],[ + dnl Look for module name + dnl First, try to find known names. This avoids confusion caused by + dnl additional files (like .stb created by some versions of pgf90) + dnl Early versions of the Intel compiler used "d" as the module extension; + dnl we include that just to finish the test as early as possible. + for name in conftest CONFTEST ; do + for ext in mod MOD d ; do + if test -s $name.$ext ; then + if test $name = conftest ; then + pac_cv_fc_module_case=lower + else + pac_cv_fc_module_case=upper + fi + pac_cv_fc_module_ext=$ext + pac_MOD=$ext + break + fi + done + if test -n "$pac_cv_fc_module_ext" ; then break ; fi + done + if test -z "$pac_MOD" ; then + pac_MOD=`ls conftest.* 2>&1 | grep -v conftest.${ac_fc_srcext} | grep -v conftest.o` + pac_MOD=`echo $pac_MOD | sed -e 's/conftest\.//g'` + pac_cv_fc_module_case="lower" + if test "X$pac_MOD" = "X" ; then + pac_MOD=`ls CONFTEST* 2>&1 | grep -v CONFTEST.f | grep -v CONFTEST.o` + pac_MOD=`echo $pac_MOD | sed -e 's/CONFTEST\.//g'` + if test -n "$pac_MOD" -a -s "CONFTEST.$pac_MOD" ; then + testname="CONFTEST" + pac_cv_fc_module_case="upper" + else + # Clear because we must have gotten an error message + pac_MOD="" + fi + fi + if test -z "$pac_MOD" ; then + pac_cv_fc_module_ext="unknown" + else + pac_cv_fc_module_ext=$pac_MOD + fi + fi +],[ + pac_cv_fc_module_ext="unknown" +]) +AC_LANG_POP(Fortran) +]) +dnl +dnl +dnl +AC_SUBST(FCMODEXT) +if test "$pac_cv_fc_module_ext" = "unknown" ; then + ifelse($2,,:,[$2]) +else + ifelse($1,,FCMODEXT=$pac_MOD,[$1]) +fi +]) +dnl +dnl +dnl PAC_FC_MODULE_INCFLAG +AC_DEFUN([PAC_FC_MODULE_INCFLAG],[ +AC_CACHE_CHECK([for Fortran 90 module include flag], +pac_cv_fc_module_incflag,[ +AC_REQUIRE([PAC_FC_MODULE_EXT]) +AC_LANG_PUSH(Fortran) +AC_LANG_CONFTEST([ + AC_LANG_SOURCE([ + module conf + integer n + parameter (n=1) + end module conf + ]) +]) +pac_madedir="no" +if test ! -d conf ; then mkdir conftestdir ; pac_madedir="yes"; fi +if test "$pac_cv_fc_module_case" = "upper" ; then + pac_module="CONF.$pac_cv_fc_module_ext" +else + pac_module="conf.$pac_cv_fc_module_ext" +fi +AC_COMPILE_IFELSE([],[ + if test -s "$pac_module" ; then + mv $pac_module conftestdir + # Remove any temporary files, and hide the work.pc file + # (if the compiler generates them) + if test -f work.pc ; then + mv -f work.pc conftest.pc + fi + rm -f work.pcl + else + AC_MSG_WARN([Unable to build a simple Fortran 90 module]) + # echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD + # cat conftest.$ac_ext >&AS_MESSAGE_LOG_FD + _AC_MSG_LOG_CONFTEST + fi + ], + [] +) +# Remove the conftest* after AC_LANG_CONFTEST +rm -rf conftest.dSYM +rm -f conftest.$ac_ext + +dnl Create the conftest here so the test isn't created everytime inside loop. +AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[use conf])]) + +# Save the original FCFLAGS +saved_FCFLAGS="$FCFLAGS" +pac_cv_fc_module_incflag="" +for inchdr in '-I' '-M' '-p' ; do + FCFLAGS="$saved_FCFLAGS ${inchdr}conftestdir" + AC_COMPILE_IFELSE([],[pac_cv_fc_module_incflag="$inchdr" ; break]) +done +if test "X$pac_cv_fc_module_incflag" = "X" ; then + if test -s conftest.pc ; then + mv conftest.pc conftestdir/mpimod.pc + echo "mpimod.pc" > conftestdir/mpimod.pcl + echo "`pwd`/conftestdir/mpimod.pc" >> conftestdir/mpimod.pcl + inchdr='-cl,' + FCFLAGS="$save_FCFLAGS ${inchdr}conftestdir" + AC_COMPILE_IFELSE([], [pac_fcompile_ok=yes], [pac_fcompile_ok=no]) + if test "$pac_fcompile_ok" = "yes" ; then + pac_cv_fc_module_incflag="$inchdr" + # Not quite right; see the comments that follow + AC_MSG_RESULT([-cl,filename where filename contains a list of files and directories]) + FC_WORK_FILES_ARG="-cl,mpimod.pcl" + FCMODINCSPEC="-cl,/mod.pcl" + else + # The version of the Intel compiler that I have refuses to let + # you put the "work catalog" list anywhere but the current directory. + pac_cv_fc_module_incflag="Unavailable!" + fi + else + # Early versions of the Intel ifc compiler required a *file* + # containing the names of files that contained the names of the + # + # -cl,filename.pcl + # filename.pcl contains + # fullpathname.pc + # The "fullpathname.pc" is generated, I believe, when a module is + # compiled. + # Intel compilers use a wierd system: -cl,filename.pcl . If no file is + # specified, work.pcl and work.pc are created. However, if you specify + # a file, it must contain a the name of a file ending in .pc . Ugh! + pac_cv_fc_module_incflag="unknown" + fi +fi +# Restore the original FCFLAGS +FCFLAGS="$saved_FCFLAGS" +if test "$pac_madedir" = "yes" ; then rm -rf conftestdir ; fi +# Remove the conftest* after AC_LANG_CONFTEST +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +AC_LANG_POP(Fortran) +]) +AC_SUBST(FC_WORK_FILES_ARG) +AC_SUBST(FCMODINCFLAG) +FCMODINCFLAG=$pac_cv_fc_module_incflag +]) +dnl +dnl +dnl +AC_DEFUN([PAC_FC_MODULE],[ +PAC_FC_MODULE_EXT +PAC_FC_MODULE_INCFLAG +]) +dnl +dnl PAC_FC_AND_F77_COMPATIBLE([action-if-true],[action-if-false]) +dnl +dnl Determine whether object files compiled with Fortran 77 can be +dnl linked to Fortran 90 main programs. +dnl +dnl The test uses a name that includes an underscore unless the 3rd +dnl argument provides another routine name. +dnl +AC_DEFUN([PAC_FC_AND_F77_COMPATIBLE],[ +AC_REQUIRE([PAC_PROG_FC_WORKS]) +AC_CACHE_CHECK([whether Fortran 90 compiler works with Fortran 77 compiler], +pac_cv_fc_and_f77,[ +pacTestRoutine=foo_abc +ifelse([$3],,,[eval pacTestRoutine=$3]) +pac_cv_fc_and_f77="unknown" +# compile the f77 program and link with the f90 program +# The reverse may not work because the Fortran 90 environment may +# expect to be in control (and to provide library files unknown to any other +# environment, even Fortran 77!) +AC_LANG_PUSH(Fortran 77) +AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ + subroutine ${pacTestRoutine}(b) + integer b + b = b + 1 + end + ]) +],[ + # pac_f77compile_ok=yes + mv conftest.$OBJEXT pac_f77conftest.$OBJEXT + # Save original LIBS, prepend previously generated object file to LIBS + saved_LIBS="$LIBS" + LIBS="pac_f77conftest.$OBJEXT $LIBS" + AC_LANG_PUSH(Fortran) + AC_LINK_IFELSE([ + AC_LANG_SOURCE([ + program main + integer a + a = 1 + call ${pacTestRoutine}(a) + end + ]) + ],[pac_cv_fc_and_f77=yes],[pac_cv_fc_and_f77=no]) + # Some versions of the Intel compiler produce these two files + rm -f work.pc work.pcl + # Restore LIBS + LIBS="$saved_LIBS" + AC_LANG_POP(Fortran) + # remove previously generated object file. + rm -f pac_f77conftest.$OBJEXT +], [ + # pac_f77compile_ok=no + pac_cv_fc_and_f77=no +]) +AC_LANG_POP(Fortran 77) +# Perform the requested action based on whether the test succeeded +if test "$pac_cv_fc_and_f77" = yes ; then + ifelse($1,,:,[$1]) +else + ifelse($2,,:,[$2]) + AC_MSG_WARN([See config.log for the failed test program and its output.]) +fi +]) +dnl +]) +dnl +dnl +dnl /*D +dnl PAC_PROG_FC_HAS_POINTER - Determine if Fortran allows pointer type +dnl +dnl Synopsis: +dnl PAC_PROG_FC_HAS_POINTER(action-if-true,action-if-false) +dnl D*/ +AC_DEFUN([PAC_PROG_FC_HAS_POINTER],[ +AC_CACHE_CHECK([whether Fortran 90 has Cray-style pointer declaration], +pac_cv_prog_fc_has_pointer,[ +AC_LANG_PUSH(Fortran) +AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([],[ + integer M + pointer (MPTR,M) + data MPTR/0/ + ]) +],[ + pac_cv_prog_fc_has_pointer="yes" +],[ + pac_cv_prog_fc_has_pointer="no" +]) dnl Endof AC_COMPILE_IFELSE +AC_LANG_POP(Fortran) +]) +if test "$pac_cv_prog_fc_has_pointer" = "yes" ; then + ifelse([$1],,:,[$1]) +else + ifelse([$2],,:,[$2]) +fi +]) +dnl +dnl +dnl +AC_DEFUN([PAC_PROG_FC_AND_C_STDIO_LIBS],[ +# To simply the code in the cache_check macro, chose the routine name +# first, in case we need it +confname=conf1_ +case "$pac_cv_prog_f77_name_mangle" in + "lower underscore") confname=conf1_ ;; + "upper stdcall") confname=CONF1 ;; + upper) confname=CONF1 ;; + "lower doubleunderscore") confname=conf1_ ;; + lower) confname=conf1 ;; + "mixed underscore") confname=conf1_ ;; + mixed) confname=conf1 ;; +esac + +AC_CACHE_CHECK([what libraries are needed to link Fortran90 programs with C routines that use stdio],pac_cv_prog_fc_and_c_stdio_libs,[ +pac_cv_prog_fc_and_c_stdio_libs=unknown + +AC_LANG_PUSH(C) +AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ +#include +int $confname( int a ) +{ printf( "The answer is %d\n", a ); fflush(stdout); return 0; } + ]) +],[ + pac_compile_ok=yes + mv conftest.$OBJEXT pac_conftest.$OBJEXT + # Save LIBS and prepend object file to LIBS + saved_LIBS="$LIBS" + LIBS="pac_conftest.$OBJEXT $LIBS" + AC_LANG_PUSH(Fortran) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([],[call conf1(0)]) + ],[ + pac_cv_prog_fc_and_c_stdio_libs=none + ],[ + # Try again with -lSystemStubs + LIBS="$LIBS -lSystemStubs" + AC_LINK_IFELSE([],[ + pac_cv_prog_fc_and_c_stdio_libs="-lSystemStubs" + ],[]) + ]) + LIBS="$saved_LIBS" + AC_LANG_POP(Fortran) + rm -f pac_conftest.$OBJEXT +]) +AC_LANG_POP(C) +dnl +if test "$pac_cv_prog_fc_and_c_stdio_libs" != none -a \ + "$pac_cv_prog_fc_and_c_stdio_libs" != unknown ; then + FC_OTHER_LIBS="$FC_OTHER_LIBS $pac_cv_prog_fc_and_c_stdio_libs" +fi +]) +dnl +]) +dnl +dnl/*D +dnl PAC_FC_CHECK_COMPILER_OPTION - Check that a FC compiler option is +dnl accepted without warning messages +dnl +dnl Synopsis: +dnl PAC_FC_CHECK_COMPILER_OPTION(optionname,action-if-ok,action-if-fail) +dnl +dnl Output Effects: +dnl +dnl If no actions are specified, a working value is added to 'FCOPTIONS' +dnl +dnl Notes: +dnl This is now careful to check that the output is different, since +dnl some compilers are noisy. +dnl +dnl We are extra careful to prototype the functions in case compiler options +dnl that complain about poor code are in effect. +dnl +dnl Because this is a long script, we have ensured that you can pass a +dnl variable containing the option name as the first argument. +dnl D*/ +AC_DEFUN([PAC_FC_CHECK_COMPILER_OPTION],[ +AC_MSG_CHECKING([whether Fortran 90 compiler accepts option $1]) +pac_opt="$1" +AC_LANG_PUSH(Fortran) +dnl Instead of defining our own ac_link and ac_compile and do AC_TRY_EVAL +dnl on these variables. We modify ac_link and ac_compile used by AC_*_IFELSE +dnl by piping the output of the command to a logfile. The reason is that +dnl 1) AC_TRY_EVAL is discouraged by Autoconf. 2) defining our ac_link and +dnl ac_compile could mess up the usage and order of FCFLAGS, LDFLAGS +dnl and LIBS in these commands, i.e. deviate from how GNU standard uses +dnl these variables. +dnl +dnl Replace " >&AS_MESSAGE_LOG_FD" by "> file 2>&1" in ac_link and ac_compile +pac_link="`echo $ac_link | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`" +dnl echo "ac_link=\"$ac_link\"" +dnl echo "pac_link=\"$pac_link\"" +saved_ac_link="$ac_link" +ac_link="$pac_link" +dnl echo "ac_link=\"$ac_link\"" + +pac_compile="`echo $ac_compile | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`" +dnl echo "ac_compile=\"$ac_compile\"" +dnl echo "pac_compile=\"$pac_compile\"" +saved_ac_compile="$ac_compile" +ac_compile="$pac_compile" +dnl echo "ac_compile=\"$ac_compile\"" + +FCFLAGS_orig="$FCFLAGS" +FCFLAGS_opt="$pac_opt $FCFLAGS" +pac_result="unknown" +AC_LANG_CONFTEST([AC_LANG_PROGRAM()]) +FCFLAGS="$FCFLAGS_orig" +pac_logfile="pac_test1.log" +rm -f $pac_logfile +AC_LINK_IFELSE([], [ + FCFLAGS="$FCFLAGS_opt" + pac_logfile="pac_test2.log" + rm -f $pac_logfile + AC_LINK_IFELSE([], [ + PAC_RUNLOG_IFELSE([diff -b pac_test1.log pac_test2.log], + [pac_result=yes], [pac_result=no]) + ],[ + pac_result=no + ]) +], [ + pac_result=no +]) +AC_MSG_RESULT([$pac_result]) +dnl Delete the conftest created by AC_LANG_CONFTEST. +rm -f conftest.$ac_ext +# +if test "$pac_result" = "yes" ; then + AC_MSG_CHECKING([whether routines compiled with $pac_opt can be linked with ones compiled without $pac_opt]) + pac_result=unknown + FCFLAGS="$FCFLAGS_orig" + pac_logfile="pac_test3.log" + rm -f $pac_logfile + AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ + subroutine try() + end + ]) + ],[ + mv conftest.$OBJEXT pac_conftest.$OBJEXT + saved_LIBS="$LIBS" + LIBS="pac_conftest.$OBJEXT $LIBS" + + FCFLAGS="$FCFLAGS_opt" + pac_logfile="pac_test4.log" + rm -f $pac_logfile + AC_LINK_IFELSE([AC_LANG_PROGRAM()], [ + diffcmd='diff -b pac_test3.log pac_test4.log' + PAC_RUNLOG_IFELSE([diff -b pac_test2.log pac_test4.log], + [pac_result=yes], [pac_result=no]) + ],[ + pac_result=no + ]) + LIBS="$saved_LIBS" + rm -f pac_conftest.$OBJEXT + ],[ + pac_result=no + ]) + AC_MSG_RESULT([$pac_result]) + rm -f pac_test3.log pac_test4.log +fi +rm -f pac_test1.log pac_test2.log + +dnl Restore everything in AC that has been overwritten +ac_link="$saved_ac_link" +ac_compile="$saved_ac_compile" +dnl echo "ac_link=\"$ac_link\"" +dnl echo "ac_compile=\"$ac_compile\"" +dnl Restore FCFLAGS before 2nd/3rd argument commands are executed, +dnl as 2nd/3rd argument command could be modifying FCFLAGS. +FCFLAGS="$FCFLAGS_orig" +if test "$pac_result" = "yes" ; then + ifelse([$2],[],[FCOPTIONS="$FCOPTIONS $1"],[$2]) +else + ifelse([$3],[],[:],[$3]) +fi + +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +AC_LANG_POP(Fortran) +]) +dnl /*D +dnl PAC_FC_WORKS_WITH_CPP +dnl +dnl Checks if Fortran 90 compiler works with C preprocessor +dnl +dnl Most systems allow the Fortran compiler to process .F and .F90 files +dnl using the C preprocessor. However, some systems either do not +dnl allow this or have serious bugs (OSF Fortran compilers have a bug +dnl that generates an error message from cpp). The following test +dnl checks to see if .F works, and if not, whether "cpp -P -C" can be used +dnl D*/ +AC_DEFUN([PAC_FC_WORKS_WITH_CPP],[ +AC_REQUIRE([AC_PROG_CPP]) +AC_MSG_CHECKING([whether Fortran 90 compiler processes .F90 files with C preprocessor]) +AC_LANG_PUSH([Fortran]) +saved_fc_ext=${ac_ext} +ac_ext="F90" +saved_FCFLAGS="$FCFLAGS" +FCFLAGS="$FCFLAGS $CPPFLAGS" +AC_LANG_CONFTEST([ + AC_LANG_SOURCE([ + program main +#define ASIZE 10 + integer a(ASIZE) + end + ]) +]) +AC_COMPILE_IFELSE([],[ + pac_cv_fc_accepts_F90=yes + ifelse([$1],[],[],[$1=""]) +],[ + pac_cv_fc_accepts_F90=no + ifelse([$1],[],[:],[$1="false"]) +]) +# Restore Fortran's ac_ext but not FCFLAGS +ac_ext="$saved_fc_ext" + +if test "$pac_cv_fc_accepts_F90" != "yes" ; then + pac_cpp_fc="$ac_cpp -C -P conftest.F90 > conftest.$ac_ext" + PAC_RUNLOG_IFELSE([$pac_cpp_fc],[ + if test -s conftest.${ac_ext} ; then + AC_COMPILE_IFELSE([],[ + pac_cv_fc_accepts_F90="no, use cpp" + ifelse([$1],[],[],[$1="$CPP -C -P"]) + ],[]) + rm -f conftest.${ac_ext} + fi + ],[]) +fi +FCFLAGS="$saved_FCFLAGS" +rm -f conftest.F90 +AC_LANG_POP([Fortran]) +AC_MSG_RESULT([$pac_cv_fc_accepts_F90]) +]) +dnl +dnl PAC_FC_VENDOR: +dnl Try to get a version string for the F90 compiler. We may +dnl need this to find likely command-line arguments for accessing +dnl shared libraries +dnl +AC_DEFUN([PAC_FC_VENDOR],[ +AC_MSG_CHECKING([for Fortran 90 compiler vendor]) +# This is complicated by some compilers (such as the Intel 8.1 ifort) +# that return a non-zero status even when they accept the -V option +# (a zero status is returned only if there is a file). +pac_cv_fc_vendor="unknown" +for arg in --version -V -v ; do + rm -f conftest.txt + PAC_RUNLOG([$FC $arg conftest.txt 2>&1]) + # Ignore the return code, because some compilers set the + # return code to zero on invalid arguments and some to + # non-zero on success (with no files to compile) + if test -f conftest.txt ; then + if grep 'Portland Group' conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=pgi + elif grep 'Sun Workshop' conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=sun + elif grep 'Sun Fortran 9' conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=sun + elif grep 'Absoft' conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=absoft + elif grep 'G95' conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=gnu + elif grep 'GNU Fortran' conftest.txt >/dev/null 2>&1 ; then + # This is gfortran + pac_cv_fc_vendor=gnu + elif grep Intel conftest.txt >/dev/null 2>&1 ; then + pac_cv_fc_vendor=intel + fi + fi + if test "$pac_cv_fc_vendor" != "unknown" ; then break ; fi +done +if test "$pac_cv_fc_vendor" = "unknown" ; then + # Try to use the compiler name + if test "$FC" = "ifort" -o "$FC" = "ifc" ; then + pac_cv_fc_vendor=intel + elif test "$FC" = "pgf90" ; then + pac_cv_fc_vendor=pgi + elif test "$FC" = "xlf90" -o "$FC" = "xlf90_r" ; then + pac_cv_fc_vendor=ibm + elif test "$FC" = "xlf95" -o "$FC" = "xlf95_r" ; then + pac_cv_fc_vendor=ibm + fi +fi +AC_MSG_RESULT([$pac_cv_fc_vendor]) +rm -f conftest.txt +# End of checking for F90 compiler vendor +]) +dnl +dnl PAC_F77_IS_FC([ACTION_IF_TRUE],[ACTION_IF_FALSE]) +dnl Check if F77 is a Fortran 90 compiler. +dnl +AC_DEFUN([PAC_F77_IS_FC],[ +AC_MSG_CHECKING([whether $F77 is a Fortran 90 compiler]) +AC_LANG_PUSH([Fortran 77]) +saved_ac_ext=$ac_ext +ac_ext="f90" +AC_LINK_IFELSE([ + AC_LANG_SOURCE([ + program main + integer, dimension(10) :: n + integer k + print *, range(k) + end + ]) +],[ + pac_cv_prog_f77_is_fc=yes + ifelse([$1],[],[],[$1]) +],[ + pac_cv_prog_f77_is_fc=no + ifelse([$2],[],[],[$2]) +]) +AC_MSG_RESULT([$pac_cv_prog_f77_is_fc]) +AC_LANG_POP([Fortran 77]) +]) +dnl +dnl PAC_FC_FLOAT_MODEL(float_type, [variable-set-if-successful-test]) +dnl variable-set-if-successful-test is optional variable. +dnl +dnl This is a runtime test. +dnl +AC_DEFUN([PAC_FC_FLOAT_MODEL],[ +type="$1" +AC_MSG_CHECKING([for precision and range of $type]) +AC_LANG_PUSH([Fortran]) +AC_LINK_IFELSE([ + AC_LANG_SOURCE([ + program main + $type aa + print *, precision(aa), ",", range(aa) + end + ]) +],[ + rm -f pac_conftest.out + PAC_RUNLOG([./conftest$EXEEXT > pac_conftest.out]) + if test -s pac_conftest.out ; then + pac_fc_num_model="`cat pac_conftest.out | sed -e 's/ */ /g'`" + AC_MSG_RESULT([$pac_fc_num_model]) + ifelse([$2],[],[],[$2=$pac_fc_num_model]) + else + AC_MSG_WARN([No output from test program!]) + fi + rm -f pac_conftest.out +],[ + AC_MSG_WARN([Failed to build program to determine the precision and range of $type]) +]) +AC_LANG_POP([Fortran]) +]) +dnl +dnl PAC_FC_SIMPLE_NUMBER_MODEL(message,test-fc-code, +dnl [variable-set-if-successful-test]) +dnl message : message of what test-fc-code is checking +dnl test-fc-code : Fortran 90 code to check a float or integer type's data model +dnl variable-set-if-successful-test : +dnl The optional variable to be set if the test-fc-code +dnl is successful in returning the simple data model. +dnl +dnl This is a runtime test. +dnl +AC_DEFUN([PAC_FC_SIMPLE_NUMBER_MODEL],[ +pac_msg="$1" +AC_MSG_CHECKING([for $pac_msg]) +AC_LANG_PUSH([Fortran]) +AC_LINK_IFELSE([ + AC_LANG_PROGRAM([],[ + $2 + ]) +],[ + rm -f pac_conftest.out + PAC_RUNLOG([./conftest$EXEEXT > pac_conftest.out]) + if test -s pac_conftest.out ; then + pac_fc_num_model="`cat pac_conftest.out | sed -e 's/ */ /g'`" + AC_MSG_RESULT([$pac_fc_num_model]) + ifelse([$3],[],[],[$3=$pac_fc_num_model]) + else + AC_MSG_WARN([No output from test program!]) + fi + rm -f pac_conftest.out +],[ + AC_MSG_WARN([Failed to build program to determine $pac_msg]) +]) +AC_LANG_POP([Fortran]) +]) +dnl +dnl PAC_FC_AVAIL_INTEGER_MODELS([INTEGER-MODELS-FLAG]) +dnl Both INTEGER-MODELS-FLAG is an optional variable to be set if provided. +dnl If it isn't provided, PAC_FC_ALL_INTEGER_MODELS will be set. +dnl +dnl This is a runtime test. +dnl +AC_DEFUN([PAC_FC_AVAIL_INTEGER_MODELS],[ +AC_MSG_CHECKING([for available integer kinds]) +AC_LANG_PUSH([Fortran]) +AC_LINK_IFELSE([ + AC_LANG_SOURCE([ + program main + integer r, lastkind + lastkind=selected_int_kind(1) + do r=2,30 + k = selected_int_kind(r) + if (k .ne. lastkind) then + print *, r-1,",",lastkind + lastkind = k + endif + if (k .le. 0) then + exit + endif + enddo + if (k.ne.lastkind) then + print *, 31, ",", k + endif + end + ]) +],[ + rm -f pac_conftest.out + PAC_RUNLOG([./conftest$EXEEXT > pac_conftest.out]) + if test -s pac_conftest.out ; then + pac_flag=`cat pac_conftest.out | sed -e 's/ */ /g'| tr '\012' ','` + AC_MSG_RESULT([$pac_flag]) + pac_validKinds="`sed -e 's/ */ /g' pac_conftest.out | tr '\012' ':'`" + ifelse([$1],[],[PAC_FC_ALL_INTEGER_MODELS=$pac_flag],[$1=$pac_flag]) + else + AC_MSG_WARN([No output from test program!]) + fi + rm -f pac_conftest.out +],[ + AC_MSG_WARN([Failed to build program to determine available integer models]) +]) +AC_LANG_POP([Fortran]) +]) +dnl +dnl PAC_FC_INTEGER_MODEL_MAP([INTEGER-MODEL-MAP-FLAG]) +dnl Both INTEGER-MODEL-MAP-FLAG is an optional variable to be set if provided. +dnl If it isn't provided, PAC_FC_INTEGER_MODEL_MAP will be set. +dnl +dnl This test expect pac_validKinds set by PAC_FC_ALL_INTEGER_MODELS. +dnl +dnl This is a runtime test. +dnl +dnl Compile the C subroutine as pac_conftest.o and Link it with a Fortran main. +AC_DEFUN([PAC_FC_INTEGER_MODEL_MAP],[ +AC_REQUIRE([PAC_FC_AVAIL_INTEGER_MODELS]) +AC_MSG_CHECKING([for available integer ranges]) +AC_LANG_PUSH([C]) +AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ +#include +#include "confdefs.h" +#ifdef F77_NAME_UPPER +#define cisize_ CISIZE +#define isize_ ISIZE +#elif defined(F77_NAME_LOWER) || defined(F77_NAME_MIXED) +#define cisize_ cisize +#define isize_ isize +#endif +int cisize_(char *,char*); +int cisize_(char *i1p, char *i2p) +{ + int isize_val=0; + isize_val = (int)(i2p - i1p); + return isize_val; +} + ]) +],[ + PAC_RUNLOG([mv conftest.$OBJEXT pac_conftest.$OBJEXT]) + pac_ccompile_ok=yes +],[ + pac_ccompile_ok=no +]) +AC_LANG_POP([C]) +dnl +if test "$pac_ccompile_ok" = "yes" ; then + saved_LIBS="$LIBS" + LIBS="pac_conftest.$OBJEXT $LIBS" + saved_IFS=$IFS + IFS=: + AC_LANG_PUSH([Fortran]) + pac_flag="" + for rangekind in $pac_validKinds ; do + kind="`echo $rangekind | sed -e 's/.*,//'`" + range="`echo $rangekind | sed -e 's/,.*//'`" + AC_LANG_CONFTEST([ + AC_LANG_SOURCE([ + program main + integer (kind=$kind) a(2) + integer cisize + print *, $range, ",", $kind, ",", cisize( a(1), a(2) ) + end + ]) + ]) + IFS=$saved_IFS + AC_LINK_IFELSE([],[ + rm -f pac_conftest.out + PAC_RUNLOG([./conftest$EXEEXT > pac_conftest.out]) + if test -s pac_conftest.out ; then + sizes="`cat pac_conftest.out | sed -e 's/ */ /g'`" + pac_flag="$pac_flag { $sizes }," + else + AC_MSG_WARN([No output from test program!]) + fi + rm -f pac_conftest.out + ],[ + AC_MSG_WARN([Fortran program fails to build!]) + ]) + IFS=: + done + IFS=$saved_IFS + AC_MSG_RESULT([$pac_flag]) + ifelse([$1],[],[PAC_FC_INTEGER_MODEL_MAP=$pac_flag],[$1=$pac_flag]) + AC_LANG_POP([Fortran]) + LIBS="$saved_LIBS" + rm -f pac_conftest.$OBJEXT +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_libs.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_libs.m4 new file mode 100644 index 0000000000..b06d1d332e --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_libs.m4 @@ -0,0 +1,57 @@ + +dnl PAC_SET_HEADER_LIB_PATH(with_option) +dnl This macro looks for the --with-xxx=, --with-xxx-include and --with-xxx-lib= +dnl options and sets the library and include paths. +AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[ + AC_ARG_WITH($1, + AC_HELP_STRING([--with-$1=path], + [specify path where $1 include directory and lib directory can be found]), + if test "${with_$1}" != "yes" -a "${with_$1}" != "no" ; then + # is adding lib64 by default really the right thing to do? What if + # we are on a 32-bit host that happens to have both lib dirs available? + LDFLAGS="$LDFLAGS -L${with_$1}/lib64 -L${with_$1}/lib" + CPPFLAGS="$CPPFLAGS -I${with_$1}/include" + WRAPPER_CFLAGS="$WRAPPER_CFLAGS -I${with_$1}/include" + fi, + ) + AC_ARG_WITH($1-include, + AC_HELP_STRING([--with-$1-include=path], + [specify path where $1 include directory can be found]), + if test "${with_$1_include}" != "yes" -a "${with_$1_include}" != "no" ; then + CPPFLAGS="$CPPFLAGS -I${with_$1_include}" + WRAPPER_CFLAGS="$WRAPPER_CFLAGS -I${with_$1_include}" + fi, + ) + AC_ARG_WITH($1-lib, + AC_HELP_STRING([--with-$1-lib=path], + [specify path where $1 lib directory can be found]), + if test "${with_$1_lib}" != "yes" -a "${with_$1_lib}" != "no" ; then + LDFLAGS="$LDFLAGS -L${with_$1_lib}" + fi, + ) +]) + + +dnl PAC_CHECK_HEADER_LIB(with_option, header.h, libname, function, action-if-yes, action-if-no) +dnl This macro checks for a header and lib. It is assumed that the +dnl user can specify a path to the includes and libs using --with-xxx=. +dnl The xxx is specified in the "with_option" parameter. +AC_DEFUN([PAC_CHECK_HEADER_LIB],[ + failure=no + AC_CHECK_HEADER([$2],,failure=yes) + AC_CHECK_LIB($3,$4,,failure=yes) + if test "$failure" = "no" ; then + $5 + else + $6 + fi +]) + +dnl PAC_CHECK_HEADER_LIB_FATAL(with_option, header.h, libname, function) +dnl Similar to PAC_CHECK_HEADER_LIB, but errors out on failure +AC_DEFUN([PAC_CHECK_HEADER_LIB_FATAL],[ + PAC_CHECK_HEADER_LIB($1,$2,$3,$4,success=yes,success=no) + if test "$success" = "no" ; then + AC_MSG_ERROR(['$2 or lib$3 library not found. Did you specify --with-$1= or --with-$1-include= or --with-$1-lib=?']) + fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_make.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_make.m4 new file mode 100644 index 0000000000..147e92e9c2 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_make.m4 @@ -0,0 +1,315 @@ +dnl +dnl We need routines to check that make works. Possible problems with +dnl make include +dnl +dnl It is really gnumake, and contrary to the documentation on gnumake, +dnl it insists on screaming everytime a directory is changed. The fix +dnl is to add the argument --no-print-directory to the make +dnl +dnl It is really BSD 4.4 make, and can't handle 'include'. For some +dnl systems, this can be fatal; there is no fix (other than removing this +dnl alleged make). +dnl +dnl It is the OSF V3 make, and can't handle a comment in a block of target +dnl code. There is no acceptable fix. +dnl +dnl +dnl +dnl +dnl Find a make program if none is defined. +AC_DEFUN([PAC_PROG_MAKE_PROGRAM],[true +if test "X$MAKE" = "X" ; then + AC_CHECK_PROGS(MAKE,make gnumake nmake pmake smake) +fi +])dnl + +dnl/*D +dnl PAC_PROG_MAKE_INCLUDE - Check whether make supports include +dnl +dnl Synopsis: +dnl PAC_PROG_MAKE_INCLUDE([action if true],[action if false]) +dnl +dnl Output Effect: +dnl None +dnl +dnl Notes: +dnl This checks for makes that do not support 'include filename'. Some +dnl versions of BSD 4.4 make required '#include' instead; some versions of +dnl 'pmake' have the same syntax. +dnl +dnl See Also: +dnl PAC_PROG_MAKE +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_MAKE_INCLUDE],[ +AC_CACHE_CHECK([whether make supports include],pac_cv_prog_make_include,[ +AC_REQUIRE([PAC_PROG_MAKE_PROGRAM]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest +cat > conftest <<. +ALL: + @echo "success" +. +cat > conftest1 <<. +include conftest +. +pac_str=`$MAKE -f conftest1 2>&1` +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest conftest1 +if test "$pac_str" != "success" ; then + pac_cv_prog_make_include="no" +else + pac_cv_prog_make_include="yes" +fi +]) +if test "$pac_cv_prog_make_include" = "no" ; then + ifelse([$2],,:,[$2]) +else + ifelse([$1],,:,[$1]) +fi +])dnl + +dnl/*D +dnl PAC_PROG_MAKE_ALLOWS_COMMENTS - Check whether comments are allowed in +dnl shell commands in a makefile +dnl +dnl Synopsis: +dnl PAC_PROG_MAKE_ALLOWS_COMMENTS([false text]) +dnl +dnl Output Effect: +dnl Issues a warning message if comments are not allowed in a makefile. +dnl Executes the argument if one is given. +dnl +dnl Notes: +dnl Some versions of OSF V3 make do not all comments in action commands. +dnl +dnl See Also: +dnl PAC_PROG_MAKE +dnl D*/ +dnl +AC_DEFUN([PAC_PROG_MAKE_ALLOWS_COMMENTS],[ +AC_CACHE_CHECK([whether make allows comments in actions], +pac_cv_prog_make_allows_comments,[ +AC_REQUIRE([PAC_PROG_MAKE_PROGRAM]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest +cat > conftest <<. +SHELL=/bin/sh +ALL: + @# This is a valid comment! + @echo "success" +. +pac_str=`$MAKE -f conftest 2>&1` +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest +if test "$pac_str" != "success" ; then + pac_cv_prog_make_allows_comments="no" +else + pac_cv_prog_make_allows_comments="yes" +fi +]) +if test "$pac_cv_prog_make_allows_comments" = "no" ; then + AC_MSG_WARN([Your make does not allow comments in target code. +Using this make may cause problems when building programs. +You should consider using gnumake instead.]) + ifelse([$1],,[$1]) +fi +])dnl + +dnl/*D +dnl PAC_PROG_MAKE_VPATH - Check whether make supports source-code paths. +dnl +dnl Synopsis: +dnl PAC_PROG_MAKE_VPATH +dnl +dnl Output Effect: +dnl Sets the variable 'VPATH' to either +dnl.vb +dnl VPATH = .:${srcdir} +dnl.ve +dnl or +dnl.vb +dnl .PATH: . ${srcdir} +dnl.ve +dnl +dnl Notes: +dnl The test checks that the path works with implicit targets (some makes +dnl support only explicit targets with 'VPATH' or 'PATH'). +dnl +dnl NEED TO DO: Check that $< works on explicit targets. +dnl +dnl See Also: +dnl PAC_PROG_MAKE +dnl +dnl D*/ +AC_DEFUN([PAC_PROG_MAKE_VPATH],[ +AC_SUBST(VPATH) +dnl AM_IGNORE(VPATH) +AC_CACHE_CHECK([for virtual path format], +pac_cv_prog_make_vpath,[ +AC_REQUIRE([PAC_PROG_MAKE_PROGRAM]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -rf conftest* +mkdir conftestdir +cat >conftestdir/a.c < conftest <&1 | grep 'conftestdir/a.c'` +if test -n "$ac_out" ; then + pac_cv_prog_make_vpath="VPATH" +else + rm -f conftest + cat > conftest <&1 | grep 'conftestdir/a.c'` + if test -n "$ac_out" ; then + pac_cv_prog_make_vpath=".PATH" + else + pac_cv_prog_make_vpath="neither VPATH nor .PATH works" + fi +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -rf conftest* +]) +if test "$pac_cv_prog_make_vpath" = "VPATH" ; then + VPATH='VPATH=.:${srcdir}' +elif test "$pac_cv_prog_make_vpath" = ".PATH" ; then + VPATH='.PATH: . ${srcdir}' +fi +])dnl + +dnl/*D +dnl PAC_PROG_MAKE_SET_CFLAGS - Check whether make sets CFLAGS +dnl +dnl Synopsis: +dnl PAC_PROG_MAKE_SET_CFLAGS([action if true],[action if false]) +dnl +dnl Output Effects: +dnl Executes the first argument if 'CFLAGS' is set by 'make'; executes +dnl the second argument if 'CFLAGS' is not set by 'make'. +dnl +dnl Notes: +dnl If 'CFLAGS' is set by make, you may wish to override that choice in your +dnl makefile. +dnl +dnl See Also: +dnl PAC_PROG_MAKE +dnl D*/ +AC_DEFUN([PAC_PROG_MAKE_SET_CFLAGS],[ +AC_CACHE_CHECK([whether make sets CFLAGS], +pac_cv_prog_make_set_cflags,[ +AC_REQUIRE([PAC_PROG_MAKE_PROGRAM]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest +cat > conftest <&1` +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest +if test "$pac_str" = "XX" ; then + pac_cv_prog_make_set_cflags="no" +else + pac_cv_prog_make_set_cflags="yes" +fi +]) +if test "$pac_cv_prog_make_set_cflags" = "no" ; then + ifelse([$2],,:,[$2]) +else + ifelse([$1],,:,[$1]) +fi +])dnl + +dnl/*D +dnl PAC_PROG_MAKE_CLOCK_SKEW - Check whether there is a problem with +dnl clock skew in suing make. +dnl +dnl Effect: +dnl Sets the cache variable 'pac_cv_prog_make_found_clock_skew' to yes or no +dnl D*/ +AC_DEFUN([PAC_PROG_MAKE_CLOCK_SKEW],[ +AC_CACHE_CHECK([whether clock skew breaks make], +pac_cv_prog_make_found_clock_skew,[ +AC_REQUIRE([PAC_PROG_MAKE_PROGRAM]) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +cat > conftest < conftest.out 2>&1 +if grep -i skew conftest >/dev/null 2>&1 ; then + pac_cv_prog_make_found_clock_skew=yes +else + pac_cv_prog_make_found_clock_skew=no +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +]) +dnl We should really do something if we detect clock skew. The question is, +dnl what? +if test "$pac_cv_prog_make_found_clock_skew" = "yes" ; then + AC_MSG_WARN([Clock skew found by make. The configure and build may fail. +Consider building in a local instead of NFS filesystem.]) +fi +]) + +dnl/*D +dnl PAC_PROG_MAKE - Checks for the varieties of MAKE, including support for +dnl VPATH +dnl +dnl Synopsis: +dnl PAC_PROG_MAKE +dnl +dnl Output Effect: +dnl Sets 'MAKE' to the make program to use if 'MAKE' is not already set. +dnl Sets the variable 'SET_CFLAGS' to 'CFLAGS =' if make sets 'CFLAGS'. +dnl +dnl Notes: +dnl This macro uses 'PAC_PROG_MAKE_INCLUDE', +dnl 'PAC_PROG_MAKE_ALLOWS_COMMENTS', 'PAC_PROG_MAKE_VPATH', and +dnl 'PAC_PROG_MAKE_SET_CFLAGS'. See those commands for details about their +dnl actions. +dnl +dnl It may call 'AC_PROG_MAKE_SET', which sets 'SET_MAKE' to 'MAKE = @MAKE@' +dnl if the make program does not set the value of make, otherwise 'SET_MAKE' +dnl is set to empty; if the make program echos the directory name, then +dnl 'SET_MAKE' is set to 'MAKE = $MAKE'. +dnl D*/ +AC_DEFUN([PAC_PROG_MAKE],[ +PAC_PROG_MAKE_PROGRAM +PAC_PROG_MAKE_CLOCK_SKEW +PAC_PROG_MAKE_INCLUDE +PAC_PROG_MAKE_ALLOWS_COMMENTS +PAC_PROG_MAKE_VPATH +AC_SUBST(SET_CFLAGS) +dnl AM_IGNORE(SET_CFLAGS) +PAC_PROG_MAKE_SET_CFLAGS([SET_CFLAGS='CFLAGS=']) +if test "$pac_cv_prog_make_echos_dir" = "no" ; then + AC_PROG_MAKE_SET +else + SET_MAKE="MAKE=${MAKE-make}" +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_mpi.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_mpi.m4 new file mode 100644 index 0000000000..79f47ec28b --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_mpi.m4 @@ -0,0 +1,526 @@ +dnl +dnl/*D +dnl PAC_LIB_MPI - Check for MPI library +dnl +dnl Synopsis: +dnl PAC_LIB_MPI([action if found],[action if not found]) +dnl +dnl Output Effect: +dnl +dnl Notes: +dnl Currently, only checks for lib mpi and mpi.h. Later, we will add +dnl MPI_Pcontrol prototype (const int or not?). +dnl +dnl Prerequisites: +dnl autoconf version 2.13 (for AC_SEARCH_LIBS) +dnl D*/ +dnl Other tests to add: +dnl Version of MPI +dnl MPI-2 I/O? +dnl MPI-2 Spawn? +dnl MPI-2 RMA? +dnl PAC_LIB_MPI([found text],[not found text]) +AC_DEFUN([PAC_LIB_MPI],[ +dnl Set the prereq to 2.50 to avoid having +AC_PREREQ(2.50) +if test "X$pac_lib_mpi_is_building" != "Xyes" ; then + # Use CC if TESTCC is defined + if test "X$pac_save_level" != "X" ; then + pac_save_TESTCC="${TESTCC}" + pac_save_TESTCPP="${TESTCPP}" + CC="$pac_save_CC" + if test "X$pac_save_CPP" != "X" ; then + CPP="$pac_save_CPP" + fi + fi + # Look for MPILIB first if it is defined + AC_SEARCH_LIBS(MPI_Init,$MPILIB mpi mpich mpich2) + if test "$ac_cv_search_MPI_Init" = "no" ; then + ifelse($2,, + AC_MSG_ERROR([Could not find MPI library]),[$2]) + fi + AC_CHECK_HEADER(mpi.h,pac_have_mpi_h="yes",pac_have_mpi_h="no") + if test $pac_have_mpi_h = "no" ; then + ifelse($2,, + AC_MSG_ERROR([Could not find mpi.h include file]),[$2]) + fi + if test "X$pac_save_level" != "X" ; then + CC="$pac_save_TESTCC" + CPP="$pac_save_TESTCPP" + fi +fi +ifelse($1,,,[$1]) +]) + +dnl This should also set MPIRUN. +dnl +dnl/*D +dnl PAC_ARG_MPI_TYPES - Add command-line switches for different MPI +dnl environments +dnl +dnl Synopsis: +dnl PAC_ARG_MPI_TYPES([default]) +dnl +dnl Output Effects: +dnl Adds the following command line options to configure +dnl+ \-\-with\-mpich[=path] - MPICH. 'path' is the location of MPICH commands +dnl. \-\-with\-ibmmpi - IBM MPI +dnl. \-\-with\-lammpi[=path] - LAM/MPI +dnl. \-\-with\-mpichnt - MPICH NT +dnl- \-\-with\-sgimpi - SGI MPI +dnl If no type is selected, and a default ("mpich", "ibmmpi", or "sgimpi") +dnl is given, that type is used as if '--with-' was given. +dnl +dnl Sets 'CC', 'F77', 'TESTCC', 'TESTF77', and 'MPILIBNAME'. Does `not` +dnl perform an AC_SUBST for these values. +dnl Also sets 'MPIBOOT' and 'MPIUNBOOT'. These are used to specify +dnl programs that may need to be run before and after running MPI programs. +dnl For example, 'MPIBOOT' may start demons necessary to run MPI programs and +dnl 'MPIUNBOOT' will stop those demons. +dnl +dnl The two forms of the compilers are to allow for tests of the compiler +dnl when the MPI version of the compiler creates executables that cannot +dnl be run on the local system (for example, the IBM SP, where executables +dnl created with mpcc will not run locally, but executables created +dnl with xlc may be used to discover properties of the compiler, such as +dnl the size of data types). +dnl +dnl Historical note: +dnl Some common autoconf tests, such as AC_CHECK_SIZEOF, used to require +dnl running a program. But some MPI compilers (often really compilation +dnl scripts) produced programs that could only be run with special commands, +dnl such as a batch submission system. To allow these test programs to be +dnl run, a separate set of compiler variables, TESTCC, TESTF77, etc., +dnl were defined. However, in later versions of autoconf, it both became +dnl unnecessary to run programs for tests such as AC_CHECK_SIZEOF and +dnl it became necessary to define CC etc. before invoking AC_PROG_CC (and +dnl the othe language compilers), because those commands now do much, much +dnl more than just determining the compiler. +dnl +dnl To address the change, we still define the TESTCC ect. compilers where +dnl possible to allow the use of AC_TRY_RUN when required, but we define +dnl the CC etc variables and do not define ac_cv_prog_CC etc., as these +dnl cause autoconf to skip all of the other initialization code that +dnl AC_PROG_CC etc. runs. Note also that this command must occur before +dnl AC_PROG_CC (or anything that might cause AC_PROG_CC to be invoked). +dnl +dnl See also: +dnl PAC_LANG_PUSH_COMPILERS, PAC_LIB_MPI +dnl D*/ +AC_DEFUN([PAC_ARG_MPI_TYPES],[ +PAC_ARG_MPI_KNOWN_TYPES +PAC_MPI_FIND_COMPILERS +PAC_MPI_CHECK_MPI_LIB +]) +dnl +dnl To keep autoconf from prematurely invoking the compiler check scripts, +dnl we need a command that first sets the compilers and a separate one +dnl that makes any necessary checks for libraries +dnl +AC_DEFUN([PAC_ARG_MPI_KNOWN_TYPES],[ +AC_ARG_WITH(mpich, +[--with-mpich=path - Assume that we are building with MPICH], +ac_mpi_type=mpich) +# Allow MPICH2 as well as MPICH +AC_ARG_WITH(mpich2, +[--with-mpich=path - Assume that we are building with MPICH], +ac_mpi_type=mpich) +AC_ARG_WITH(lammpi, +[--with-lammpi=path - Assume that we are building with LAM/MPI], +ac_mpi_type=lammpi) +AC_ARG_WITH(ibmmpi, +[--with-ibmmpi - Use the IBM SP implementation of MPI], +ac_mpi_type=ibmmpi) +AC_ARG_WITH(sgimpi, +[--with-sgimpi - Use the SGI implementation of MPI], +ac_mpi_type=sgimpi) +AC_ARG_WITH(mpichnt, +[--with-mpichnt - Use MPICH for Windows NT ], +ac_mpi_type=mpichnt) +AC_ARG_WITH(mpi, +[--with-mpi=path - Use an MPI implementation with compile scripts mpicc + and mpif77 in path/bin],ac_mpi_type=generic) + +if test "X$ac_mpi_type" = "X" ; then + if test "X$1" != "X" ; then + ac_mpi_type=$1 + else + ac_mpi_type=unknown + fi +fi +if test "$ac_mpi_type" = "unknown" -a "$pac_lib_mpi_is_building" = "yes" ; then + ac_mpi_type="mpich" +fi +]) +AC_DEFUN([PAC_MPI_FIND_COMPILERS],[ +# Set defaults +MPIRUN_NP="-np " +MPIEXEC_N="-n " +AC_SUBST(MPIRUN_NP) +AC_SUBST(MPIEXEC_N) +dnl +AC_ARG_VAR([MPIEXEC],[Name and path of mpiexec program]) +AC_ARG_VAR([MPIRUN],[Name and path of mpirun program]) +AC_ARG_VAR([MPIBOOT],[Name and path of program to run before mpirun]) +AC_ARG_VAR([MPIUNBOOT],[Name and path of program to run after all mpirun]) +AC_ARG_VAR([MPICC],[Name and absolute path of program used to compile MPI programs in C]) +AC_ARG_VAR([MPIF77],[Name and absolute path of program used to compile MPI programs in F77]) +AC_ARG_VAR([MPICXX],[Name and absolute path of program used to compile MPI programs in C++]) +AC_ARG_VAR([MPIF90],[Name and absolute path of program used to compile MPI programs in F90]) +# +# Check for things that will cause trouble. For example, +# if MPICC is defined but does not contain a / or \, then PATH_PROG will +# ignore the value +if test -n "$MPICC" ; then + case $MPICC in +changequote(<<,>>) + [\\/]* | ?:[\\/]*) +changequote([,]) + # Ok, PATH_PROG will figure it out + ;; + *) + AC_MSG_ERROR([MPICC must be set to an absolute path if it is set]) + esac +fi +if test -n "$MPICXX" ; then + case $MPICXX in +changequote(<<,>>) + [\\/]* | ?:[\\/]*) +changequote([,]) + # Ok, PATH_PROG will figure it out + ;; + *) + AC_MSG_ERROR([MPICXX must be set to an absolute path if it is set]) + esac +fi +if test -n "$MPIF77" ; then + case $MPIF77 in +changequote(<<,>>) + [\\/]* | ?:[\\/]*) +changequote([,]) + # Ok, PATH_PROG will figure it out + ;; + *) + AC_MSG_ERROR([MPIF77 must be set to an absolute path if it is set]) + esac +fi +if test -n "$MPIF90" ; then + case $MPIF90 in +changequote(<<,>>) + [\\/]* | ?:[\\/]*) +changequote([,]) + # Ok, PATH_PROG will figure it out + ;; + *) + AC_MSG_ERROR([MPIF90 must be set to an absolute path if it is set]) + esac +fi + +case $ac_mpi_type in + mpich) + dnl + dnl This isn't correct. It should try to get the underlying compiler + dnl from the mpicc and mpif77 scripts or mpireconfig + if test "X$pac_lib_mpi_is_building" != "Xyes" ; then + save_PATH="$PATH" + if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then + # Look for commands; if not found, try adding bin to the + # path + if test ! -x $with_mpich/mpicc -a -x $with_mpich/bin/mpicc ; then + with_mpich="$with_mpich/bin" + fi + PATH=$with_mpich:${PATH} + fi + AC_PATH_PROG(MPICC,mpicc) + if test -z "$TESTCC" ; then TESTCC=${CC-cc} ; fi + CC="$MPICC" + # Note that autoconf may unconditionally change the value of + # CC (!) in some other command. Thus, we define CCMASTER + CCMASTER=$CC + # to permit configure codes to recover the correct CC. This + # is an ugly not-quite-correct workaround for the fact that + # does not want you to change the C compiler once you have set it + # (But since it does so unconditionally, it silently creates + # bogus output files.) + AC_PATH_PROG(MPIF77,mpif77) + if test -z "$TESTF77" ; then TESTF77=${F77-f77} ; fi + F77="$MPIF77" + AC_PATH_PROG(MPIFC,mpif90) + if test -z "$TESTFC" ; then TESTFC=${FC-f90} ; fi + FC="$MPIFC" + AC_PATH_PROG(MPICXX,mpiCC) + if test -z "$TESTCXX" ; then TESTCXX=${CXX-CC} ; fi + CXX="$MPICXX" + # We may want to restrict this to the path containing mpirun + AC_PATH_PROG(MPIEXEC,mpiexec) + AC_PATH_PROG(MPIRUN,mpirun) + AC_PATH_PROG(MPIBOOT,mpichboot) + AC_PATH_PROG(MPIUNBOOT,mpichstop) + PATH="$save_PATH" + MPILIBNAME="mpich" + else + # All of the above should have been passed in the environment! + : + fi + ;; + + mpichnt) + ;; + + lammpi) + dnl + dnl This isn't correct. It should try to get the underlying compiler + dnl from the mpicc and mpif77 scripts or mpireconfig + save_PATH="$PATH" + if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then + # Look for commands; if not found, try adding bin to the path + if test ! -x $with_lammpi/mpicc -a -x $with_lammpi/bin/mpicc ; then + with_lammpi="$with_lammpi/bin" + fi + PATH=$with_lammpi:${PATH} + fi + AC_PATH_PROG(MPICC,mpicc) + if test -z "$TESTCC" ; then TESTCC=${CC-cc} ; fi + CC="$MPICC" + AC_PATH_PROG(MPIF77,mpif77) + if test -z "$TESTCC" ; then TESTF77=${F77-f77} ; fi + F77="$MPIF77" + AC_PATH_PROG(MPIFC,mpif90) + TESTFC=${FC-f90} + if test -z "$TESTFC" ; then TESTFC=${FC-f90} ; fi + FC="$MPIFC" + AC_PATH_PROG(MPICXX,mpiCC) + if test -z "$TESTCXX" ; then TESTCXX=${CXX-CC} ; fi + CXX="$MPICXX" + PATH="$save_PATH" + MPILIBNAME="lammpi" + MPIBOOT="lamboot" + MPIUNBOOT="wipe" + MPIRUN="mpirun" + ;; + + ibmmpi) + AC_CHECK_PROGS(MPCC,mpcc) + AC_CHECK_PROGS(MPXLF,mpxlf mpfort) + if test -z "$MPCC" -o -z "$MPXLF" ; then + AC_MSG_ERROR([Could not find IBM MPI compilation scripts. Either mpcc or mpxlf/mpfort is missing]) + fi + if test -z "$TESTCC" ; then TESTCC=${CC-xlC} ; fi + if test -z "$TESTF77" ; then TESTF77=${F77-xlf}; fi + CC=mpcc; F77=$MPXLF + # There is no mpxlf90, but the options langlvl and free can + # select the Fortran 90 version of xlf + if test "$enable_f90" != no ; then + AC_CHECK_PROGS(MPIXLF90,mpxlf90 mpfort) + if test -z "$TESTFC" ; then TESTFC=${FC-xlf90}; fi + if test "X$MPIXLF90" != "X" ; then + FC="$MPIXLF90" + else + FC="$MPXLF -qlanglvl=90ext -qfree=f90" + fi + fi + MPILIBNAME="" + cross_compiling=yes + # Turn off the autoconf version 3 warning message + ac_tool_warned=yes + ;; + + sgimpi) + if test -z "$TESTCC" ; then TESTCC=${CC:=cc} ; fi + if test -z "$TESTF77" ; then TESTF77=${F77:=f77} ; fi + if test -z "$TESTCXX" ; then TESTCXX=${CXX:=CC} ; fi + if test -z "$TESTFC" ; then TESTFC=${FC:=f90} ; fi + AC_CHECK_LIB(mpi,MPI_Init) + if test "$ac_cv_lib_mpi_MPI_Init" = "yes" ; then + MPILIBNAME="mpi" + fi + MPIRUN=mpirun + MPIBOOT="" + MPIUNBOOT="" + ;; + + generic) + # Find the compilers. Expect the compilers to be mpicc and mpif77 + # in $with_mpi/bin + PAC_PROG_CC + # We only look for the other compilers if there is no + # disable for them + if test "$enable_f77" != no -a "$enable_fortran" != no ; then + AC_PROG_F77 + fi + if test "$enable_cxx" != no ; then + AC_PROG_CXX + fi + if test "$enable_f90" != no ; then + PAC_PROG_FC + fi + # Set defaults for the TEST versions if not already set + if test -z "$TESTCC" ; then + TESTCC=${CC:=cc} + fi + if test -z "$TESTF77" ; then + TESTF77=${F77:=f77} + fi + if test -z "$TESTCXX" ; then + TESTCXX=${CXX:=CC} + fi + if test -z "$TESTFC" ; then + TESTFC=${FC:=f90} + fi + # in $with_mpi/bin or $with_mpi + if test "X$MPICC" = "X" ; then + if test -x "$with_mpi/bin/mpicc" ; then + MPICC=$with_mpi/bin/mpicc + elif test -x "$with_mpi/mpicc" ; then + MPICC=$with_mpi/mpicc + fi + fi + if test "X$MPICXX" = "X" ; then + if test -x "$with_mpi/bin/mpicxx" ; then + MPICXX=$with_mpi/bin/mpicxx + elif test -x "$with_mpi/mpicxx" ; then + MPICXX=$with_mpi/mpicxx + fi + fi + if test "X$MPIF77" = "X" ; then + if test -x "$with_mpi/bin/mpif77" ; then + MPIF77=$with_mpi/bin/mpif77 + elif test -x "$with_mpi/mpif77" ; then + MPIF77=$with_mpi/mpif77 + fi + fi + if test "X$MPIF90" = "X" ; then + if test -x "$with_mpi/bin/mpif90" ; then + MPIF90=$with_mpi/bin/mpif90 + elif test -x "$with_mpi/mpif90" ; then + MPIF90=$with_mpi/mpif90 + fi + fi + if test "X$MPIEXEC" = "X" ; then + if test -x "$with_mpi/bin/mpiexec" ; then + MPIEXEC=$with_mpi/bin/mpiexec + elif test -x "$with_mpi/mpiexec" ; then + MPIEXEC=$with_mpi/mpiexec + fi + fi + CC=$MPICC + F77=$MPIF77 + if test "X$MPICXX" != "X" ; then CXX=$MPICXX ; fi + if test "X$MPIF90" != "X" ; then F90=$MPIF90 ; fi + ;; + + *) + # Use the default choices for the compilers + ;; +esac +# Tell autoconf to determine properties of the compilers (these are the +# compilers for MPI programs) +AC_PROG_CC +if test "$enable_f77" != no -a "$enable_fortran" != no ; then + AC_PROG_F77 +fi +if test "$enable_cxx" != no ; then + AC_PROG_CXX +fi +if test "$enable_f90" != no ; then + PAC_PROG_FC +fi +]) + +dnl +dnl This uses the selected CC etc to check for include paths and libraries +AC_DEFUN([PAC_MPI_CHECK_MPI_LIB],[ +case $ac_mpi_type in + mpich) + ;; + + mpichnt) + dnl + dnl This isn't adequate, but it helps with using MPICH-NT/SDK.gcc + save_CFLAGS="$CFLAGS" + CFLAGS="$save_CFLAGS -I$with_mpichnt/include" + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$save_CPPFLAGS -I$with_mpichnt/include" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$save_LDFLAGS -L$with_mpichnt/lib" + AC_CHECK_LIB(mpich,MPI_Init,found="yes",found="no") + if test "$found" = "no" ; then + AC_CHECK_LIB(mpich2,MPI_Init,found="yes",found="no") + fi + if test "$enable_cxx" != no ; then + AC_PROG_CXX + fi + if test "$enable_f90" != no ; then + PAC_PROG_FC + fi + # Set defaults for the TEST versions if not already set + if test -z "$TESTCC" ; then TESTCC=${CC:=cc} ; fi + if test -z "$TESTF77" ; then TESTF77=${F77:=f77} ; fi + if test -z "$TESTCXX" ; then TESTCXX=${CXX:=CC} ; fi + if test -z "$TESTFC" ; then TESTFC=${FC:=f90} ; fi + if test "$found" = "no" ; then + CFLAGS=$save_CFLAGS + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + fi + ;; + + lammpi) + ;; + + ibmmpi) + ;; + + sgimpi) + AC_CHECK_LIB(mpi,MPI_Init) + if test "$ac_cv_lib_mpi_MPI_Init" = "yes" ; then + MPILIBNAME="mpi" + fi + ;; + + generic) + AC_SEARCH_LIBS(MPI_Init,mpi mpich2 mpich) + if test "$ac_cv_lib_mpi_MPI_Init" = "yes" ; then + MPILIBNAME="mpi" + fi + ;; + + *) + ;; +esac +]) + +dnl +dnl/*D +dnl PAC_MPI_F2C - Determine if MPI has the MPI-2 functions MPI_xxx_f2c and +dnl MPI_xxx_c2f +dnl +dnl Output Effect: +dnl Define 'HAVE_MPI_F2C' if the routines are found. +dnl +dnl Notes: +dnl Looks only for 'MPI_Request_c2f'. +dnl D*/ +AC_DEFUN([PAC_MPI_F2C],[ +AC_CACHE_CHECK([for MPI F2C and C2F routines], +pac_cv_mpi_f2c, +[ +AC_TRY_LINK([#include "mpi.h"], +[MPI_Request request;MPI_Fint a;a = MPI_Request_c2f(request);], +pac_cv_mpi_f2c="yes",pac_cv_mpi_f2c="no") +]) +if test "$pac_cv_mpi_f2c" = "yes" ; then + AC_DEFINE(HAVE_MPI_F2C,1,[Define if MPI has F2C]) +fi +]) +dnl +dnl/*D +dnl PAC_HAVE_ROMIO - make mpi.h include mpio.h if romio enabled +dnl +dnl Output Effect: +dnl expands @HAVE_ROMIO@ in mpi.h into #include "mpio.h" +dnl D*/ +AC_DEFUN([PAC_HAVE_ROMIO],[ +if test "$enable_romio" = "yes" ; then HAVE_ROMIO='#include "mpio.h"'; fi +AC_SUBST(HAVE_ROMIO) +]) diff --git a/ompi/mca/io/romio/romio/acinclude.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_romio.m4 similarity index 81% rename from ompi/mca/io/romio/romio/acinclude.m4 rename to ompi/mca/io/romio/romio/confdb/aclocal_romio.m4 index d7b0f4035b..9cc31130d0 100644 --- a/ompi/mca/io/romio/romio/acinclude.m4 +++ b/ompi/mca/io/romio/romio/confdb/aclocal_romio.m4 @@ -34,10 +34,10 @@ program failed to produce an object file]) allstrings="-a" fi - nameform1=`strings $allstrings confftest.o | grep mpir_init_fop_ | head -n 1` - nameform2=`strings $allstrings confftest.o | grep MPIR_INIT_FOP | head -n 1` - nameform3=`strings $allstrings confftest.o | grep mpir_init_fop | head -n 1` - nameform4=`strings $allstrings confftest.o | grep mpir_init_fop__ | head -n 1` + nameform1=`strings $allstrings confftest.o | grep mpir_init_fop_ | head -1` + nameform2=`strings $allstrings confftest.o | grep MPIR_INIT_FOP | head -1` + nameform3=`strings $allstrings confftest.o | grep mpir_init_fop | head -1` + nameform4=`strings $allstrings confftest.o | grep mpir_init_fop__ | head -1` rm -f confftest.f confftest.o if test -n "$nameform4" ; then echo "Fortran externals are lower case and have two trailing underscores" @@ -62,7 +62,9 @@ Turning off Fortran (-nof77 being assumed)]) if test -n "$FORTRANNAMES" ; then WDEF="-D$FORTRANNAMES" fi - rm -f confftest.f confftest.o + # Delete confftest files with any extension. This catches the case + # where auxillary files, such as coverage files, are removed. + rm -f confftest.* ])dnl dnl define(PAC_GET_SPECIAL_SYSTEM_INFO,[ @@ -174,8 +176,12 @@ define(PAC_TEST_MPI,[ } EOF rm -f conftest - $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1 + cmd="$CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB" + echo "$as_me:$LINENO: $cmd" >&5 + $cmd >&5 2>&5 if test ! -x conftest ; then + echo "$as_me:$LINENO: failed program was:" >&5 + sed 's/^/| /' mpitest.c >&5 rm -f conftest mpitest.c AC_MSG_ERROR([Unable to compile a simple MPI program. Use environment variables to provide the location of MPI libraries and @@ -521,7 +527,10 @@ dnl dnl tries to determine the Fortran 90 kind parameter for 8-byte integers dnl define(PAC_MPI_OFFSET_KIND, -[rm -f conftest* +[ +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* # Determine the extension for Fortran 90 files (not all compilers accept # .f and not all accept .f90) if test -z "$ac_f90ext" ; then @@ -538,6 +547,8 @@ EOF if AC_TRY_EVAL(ac_f90compile) ; then AC_MSG_RESULT([f90]) else + # This is needed for Mac OSX 10.5 + rm -rf conftest.dSYM rm -f conftest* ac_f90ext="f" cat > conftest.$ac_f90ext </dev/null 2>&1 ; then KINDVAL=`cat conftest.out` fi fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM rm -f conftest* if test -n "$KINDVAL" -a "$KINDVAL" != "-1" ; then AC_MSG_RESULT($KINDVAL) @@ -613,8 +626,8 @@ dnl dnl define(PAC_GET_XFS_MEMALIGN, [AC_MSG_CHECKING([for memory alignment needed for direct I/O]) -/bin/rm -f memalignval -/bin/rm -f /tmp/romio_tmp.bin +rm -f memalignval +rm -f /tmp/romio_tmp.bin AC_TEST_PROGRAM([#include #include #include @@ -629,8 +642,8 @@ main() { fprintf( f, "%u\n", st.d_mem); exit(0); }],Pac_CV_NAME=`cat memalignval`,Pac_CV_NAME="") -/bin/rm -f memalignval -/bin/rm -f /tmp/romio_tmp.bin +rm -f memalignval +rm -f /tmp/romio_tmp.bin if test -n "$Pac_CV_NAME" -a "$Pac_CV_NAME" != 0 ; then AC_MSG_RESULT($Pac_CV_NAME) CFLAGS="$CFLAGS -DXFS_MEMALIGN=$Pac_CV_NAME" @@ -653,6 +666,8 @@ dnl define(PAC_MAKE_VPATH,[ AC_SUBST(VPATH) AC_MSG_CHECKING(for virtual path format) +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM rm -rf conftest* mkdir conftestdir cat >conftestdir/a.c < mpitest.c < /dev/null 2>&1 + if test -x conftest ; then + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MPIU_FUNCS,1,[Define if MPICH2 memory tracing macros defined]) + else + AC_MSG_RESULT(no) + fi + rm -f conftest mpitest.c +])dnl +dnl define(PAC_TEST_MPI_GREQUEST,[ AC_MSG_CHECKING(support for generalized requests) rm -f mpitest.c @@ -835,154 +874,10 @@ EOF $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1 if test -x conftest ; then AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MPI_GREQUEST,,[Define if generalized requests avaliable]) + AC_DEFINE(HAVE_MPI_GREQUEST,1,[Define if generalized requests avaliable]) + DEFINE_HAVE_MPI_GREQUEST="#define HAVE_MPI_GREQUEST 1" else AC_MSG_RESULT(no) fi rm -f conftest mpitest.c ])dnl -dnl -dnl/*D -dnl PAC_FUNC_NEEDS_DECL - Set NEEDS__DECL if a declaration is needed -dnl -dnl Synopsis: -dnl PAC_FUNC_NEEDS_DECL(headerfiles,funcname) -dnl -dnl Output Effect: -dnl Sets 'NEEDS__DECL' if 'funcname' is not declared by the -dnl headerfiles. -dnl -dnl Approach: -dnl Try to compile a program with the function, but passed with an incorrect -dnl calling sequence. If the compilation fails, then the declaration -dnl is provided within the header files. If the compilation succeeds, -dnl the declaration is required. -dnl -dnl We use a 'double' as the first argument to try and catch varargs -dnl routines that may use an int or pointer as the first argument. -dnl -dnl D*/ -AC_DEFUN([PAC_FUNC_NEEDS_DECL],[ -AC_CACHE_CHECK([whether $2 needs a declaration], -[pac_cv_func_decl_$2],[ -AC_TRY_COMPILE([$1],[int a=$2(1.0,27,1.0,"foo");], -pac_cv_func_decl_$2=yes,pac_cv_func_decl_$2=no)]) -if test "$pac_cv_func_decl_$2" = "yes" ; then -changequote(<<,>>)dnl -define(<>, translit(NEEDS_$2_DECL, [a-z *], [A-Z__]))dnl -changequote([, ])dnl - AC_DEFINE_UNQUOTED(PAC_FUNC_NAME,1,[Define if $2 needs a declaration]) -undefine([PAC_FUNC_NAME]) -fi -])dnl -dnl -dnl/*D -dnl PAC_C_RESTRICT - Check if C supports restrict -dnl -dnl Synopsis: -dnl PAC_C_RESTRICT -dnl -dnl Output Effect: -dnl Defines 'restrict' if some version of restrict is supported; otherwise -dnl defines 'restrict' as empty. This allows you to include 'restrict' in -dnl declarations in the same way that 'AC_C_CONST' allows you to use 'const' -dnl in declarations even when the C compiler does not support 'const' -dnl -dnl Note that some compilers accept restrict only with additional options. -dnl DEC/Compaq/HP Alpha Unix (Tru64 etc.) -accept restrict_keyword -dnl -dnl D*/ -AC_DEFUN([PAC_C_RESTRICT],[ -AC_CACHE_CHECK([for restrict], -[pac_cv_c_restrict],[ -AC_TRY_COMPILE(,[int * restrict a;],pac_cv_c_restrict="restrict", -pac_cv_c_restrict="no") -if test "$pac_cv_c_restrict" = "no" ; then - AC_TRY_COMPILE(,[int * _Restrict a;],pac_cv_c_restrict="_Restrict", - pac_cv_c_restrict="no") -fi -if test "$pac_cv_c_restrict" = "no" ; then - AC_TRY_COMPILE(,[int * __restrict a;],pac_cv_c_restrict="__restrict", - pac_cv_c_restrict="no") -fi -]) -if test "$pac_cv_c_restrict" = "no" ; then - restrict_val="" -elif test "$pac_cv_c_restrict" != "restrict" ; then - restrict_val=$pac_cv_c_restrict -fi -if test "$restrict_val" != "restrict" ; then - AC_DEFINE_UNQUOTED(restrict,$restrict_val,[if C does not support restrict]) -fi -])dnl -dnl -dnl -dnl -dnl This is a replacement for AC_PROG_CC that does not prefer gcc and -dnl that does not mess with CFLAGS. See acspecific.m4 for the original defn. -dnl -dnl/*D -dnl PAC_PROG_CC - Find a working C compiler -dnl -dnl Synopsis: -dnl PAC_PROG_CC -dnl -dnl Output Effect: -dnl Sets the variable CC if it is not already set -dnl -dnl Notes: -dnl Unlike AC_PROG_CC, this does not prefer gcc and does not set CFLAGS. -dnl It does check that the compiler can compile a simple C program. -dnl It also sets the variable GCC to yes if the compiler is gcc. It does -dnl not yet check for some special options needed in particular for -dnl parallel computers, such as -Tcray-t3e, or special options to get -dnl full ANSI/ISO C, such as -Aa for HP. -dnl -dnl D*/ -dnl 2.52 doesn't have AC_PROG_CC_GNU -ifdef([AC_PROG_CC_GNU],,[AC_DEFUN([AC_PROG_CC_GNU],)]) -AC_DEFUN([PAC_PROG_CC],[ -AC_PROVIDE([AC_PROG_CC]) -AC_CHECK_PROGS([CC, cc xlC xlc pgcc icc gcc]) -test -z "$CC" && AC_MSG_ERROR([no acceptable cc found in \$PATH]) -PAC_PROG_CC_WORKS -AC_PROG_CC_GNU -if test "$ac_cv_prog_gcc" = yes; then - GCC=yes -else - GCC= -fi -]) -dnl -dnl -dnl PAC_C_GNU_ATTRIBUTE - See if the GCC __attribute__ specifier is allow. -dnl Use the following -dnl #ifndef HAVE_GCC_ATTRIBUTE -dnl #define __attribute__(a) -dnl #endif -dnl If *not*, define __attribute__(a) as null -dnl -dnl We start by requiring Gcc. Some other compilers accept __attribute__ -dnl but generate warning messages, or have different interpretations -dnl (which seems to make __attribute__ just as bad as #pragma) -dnl For example, the Intel icc compiler accepts __attribute__ and -dnl __attribute__((pure)) but generates warnings for __attribute__((format...)) -dnl -AC_DEFUN([PAC_C_GNU_ATTRIBUTE],[ -AC_REQUIRE([AC_PROG_CC]) -if test "$ac_cv_prog_gcc" = "yes" ; then - AC_CACHE_CHECK([whether __attribute__ allowed], -pac_cv_gnu_attr_pure,[ -AC_TRY_COMPILE([int foo(int) __attribute__ ((pure));],[int a;], -pac_cv_gnu_attr_pure=yes,pac_cv_gnu_attr_pure=no)]) -AC_CACHE_CHECK([whether __attribute__((format)) allowed], -pac_cv_gnu_attr_format,[ -AC_TRY_COMPILE([int foo(char *,...) __attribute__ ((format(printf,1,2)));],[int a;], -pac_cv_gnu_attr_format=yes,pac_cv_gnu_attr_format=no)]) - if test "$pac_cv_gnu_attr_pure" = "yes" -a "$pac_cv_gnu_attr_format" = "yes" ; then - AC_DEFINE(HAVE_GCC_ATTRIBUTE,1,[Define if GNU __attribute__ is supported]) - fi -fi -]) -dnl - diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_runlog.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_runlog.m4 new file mode 100644 index 0000000000..66533a2e4a --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_runlog.m4 @@ -0,0 +1,64 @@ +dnl +dnl PAC_RUN_LOG mimics _AC_RUN_LOG which is autoconf internal routine. +dnl We also make sure PAC_RUN_LOG can be used in AS_IF, so the last +dnl test command should have terminating ]), i.e. without newline before ]). +dnl +AC_DEFUN([PAC_RUNLOG],[ +{ AS_ECHO(["$as_me:$LINENO: $1"]) >&AS_MESSAGE_LOG_FD + (eval $1) 2>&AS_MESSAGE_LOG_FD + ac_status=$? + AS_ECHO(["$as_me:$LINENO: \$? = $ac_status"]) >&AS_MESSAGE_LOG_FD + test $ac_status = 0; }]) +dnl +dnl PAC_COMMAND_IFELSE is written to replace AC_TRY_EVAL with added logging +dnl to config.log, i.e. AC_TRY_EVAL does not log anything to config.log. +dnl If autoconf provides AC_COMMAND_IFELSE or AC_EVAL_IFELSE, +dnl AC_COMMAND_IFELSE dnl should be replaced by the official autoconf macros. +dnl +dnl PAC_COMMAND_IFELSE(COMMMAND,[ACTION-IF-RUN-OK],[ACTION-IF-RUN-FAIL]) +dnl +AC_DEFUN([PAC_COMMAND_IFELSE],[ +dnl Should use _AC_DO_TOKENS but use AC_RUN_LOG instead +dnl because _AC_XX is autoconf's undocumented macro. +AS_IF([PAC_RUNLOG([$1])],[ + $2 +],[ + AS_ECHO(["$as_me: program exited with status $ac_status"]) >&AS_MESSAGE_LOG_FD + m4_ifvaln([$3],[ + (exit $ac_status) + $3 + ]) +]) +]) +dnl +dnl +dnl +AC_DEFUN([PAC_EVAL_IFELSE],[ +dnl Should use _AC_DO_TOKENS but use AC_RUN_LOG instead +dnl because _AC_XX is autoconf's undocumented macro. +AS_IF([PAC_RUNLOG([$$1])],[ + $2 +],[ + AS_ECHO(["$as_me: program exited with status $ac_status"]) >&AS_MESSAGE_LOG_FD + m4_ifvaln([$3],[ + (exit $ac_status) + $3 + ]) +]) +]) +dnl +dnl +dnl +AC_DEFUN([PAC_RUNLOG_IFELSE],[ +dnl pac_TESTLOG is the internal temporary logfile for this macro. +pac_TESTLOG="pac_test.log" +rm -f $pac_TESTLOG +PAC_COMMAND_IFELSE([$1 > $pac_TESTLOG],[ + ifelse([$2],[],[],[$2]) +],[ + AS_ECHO(["*** $1 :"]) >&AS_MESSAGE_LOG_FD + cat $pac_TESTLOG >&AS_MESSAGE_LOG_FD + ifelse([$3],[],[],[$3]) +]) +rm -f $pac_TESTLOG +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_shl.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_shl.m4 new file mode 100644 index 0000000000..c9c40d852e --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_shl.m4 @@ -0,0 +1,407 @@ +dnl +dnl Definitions for creating shared libraries +dnl +dnl The purpose of these definitions is to provide common support for +dnl shared libraries, with *or without* the use of the GNU Libtool package. +dnl For many of our important platforms, the Libtool approach is overkill, +dnl and can be particularly painful for developers. +dnl +dnl To use libtool, you need macros that are defined by libtool for libtool +dnl Don't even think about the consequences of this for updating and for +dnl using user-versions of libtool :( +dnl +dnl !!!!!!!!!!!!!!!!!!!!! +dnl libtool requires ac 2.50 !!!!!!!!!!!!!!!!! +dnl +dnl builtin(include,libtool.m4) +dnl +dnl/*D +dnl PAC_ARG_SHAREDLIBS - Add --enable-sharedlibs=kind to configure. +dnl +dnl Synopsis: +dnl PAC_ARG_SHAREDLIBS +dnl +dnl Output effects: +dnl Adds '--enable-sharedlibs=kind' to the command line. If this is enabled, +dnl then based on the value of 'kind', programs are selected for the +dnl names 'CC_SHL' and 'CC_LINK_SHL' that configure will substitute for in +dnl 'Makefile.in's. These symbols are generated by 'simplemake' when +dnl shared library support is selected. +dnl The variable 'C_LINKPATH_SHL' is set to the option to specify the +dnl path to search at runtime for libraries (-rpath in gcc/GNU ld). +dnl This can be turned off with --disable-rpath , which is appropriate +dnl for libraries and for executables that may be installed in different +dnl locations. +dnl The variable 'SHLIB_EXT' is set to the extension used by shared +dnl libraries; under most forms of Unix, this is 'so'; under Mac OS/X, this +dnl is 'dylib', and under Windows (including cygwin), this is 'dll'. +dnl +dnl Supported values of 'kind' include \: +dnl+ gcc - Use gcc to create both shared objects and libraries +dnl. osx-gcc - Use gcc on Mac OS/X to create both shared objects and +dnl libraries +dnl. solaris-cc - Use native Solaris cc to create shared objects and +dnl libraries +dnl. cygwin-gcc - Use gcc on Cygwin to create shared objects and libraries +dnl- none - The same as '--disable-sharedlibs' +dnl +dnl Others will be added as experience dictates. Likely names are +dnl + libtool - For general GNU libtool +dnl - linux-pgcc - For Portland group under Linux +dnl +dnl Notes: +dnl Shared libraries are only partially implemented. Additional symbols +dnl will probably be defined, including symbols to specify how shared library +dnl search paths are specified and how shared library names are set. +dnl D*/ +AC_DEFUN([PAC_ARG_SHAREDLIBS],[ + +AC_ARG_ENABLE(shared, + AC_HELP_STRING([--enable-shared], [Enable shared library builds]),, + enable_shared=no) + +AC_ARG_ENABLE(rpath, + AC_HELP_STRING([--enable-rpath], + [Determine whether the rpath is set when programs are + compiled and linked when shared libraries are built. + The default is yes; use --disable-rpath to turn this + feature off; in that case, shared libraries will be + found according to the rules for your system (e.g., in + LD_LIBRARY_PATH)]),,enable_rpath=yes) + +AC_ARG_ENABLE(sharedlibs, +[ --enable-sharedlibs=kind - Enable shared libraries. kind may be + gcc - Standard gcc and GNU ld options for creating shared libraries + osx-gcc - Special options for gcc needed only on OS/X + solaris-cc - Solaris native (SPARC) compilers for 32 bit systems + cygwin-gcc - Special options for gcc needed only for cygwin + none - same as --disable-sharedlibs + Only gcc, osx-gcc, and solaris-cc are currently supported +],,enable_sharedlibs=default) + +if test "$enable_sharedlibs" = "default" ; then + if test "$enable_shared" = "yes" ; then + AS_CASE([$host], + [*-*-darwin*], [enable_sharedlibs=gcc-osx], + [*-*-cygwin*|*-*-mingw*|*-*-pw32*|*-*-cegcc*], [enable_sharedlibs=cygwin-gcc], + [*-*-sunos*], [enable_sharedlibs=solaris-gcc], + [enable_sharedlibs=gcc]) + else + enable_sharedlibs=none + fi +fi + +# If --enable-sharedlibs is given, but --enable-shared is not, throw +# an error +if test "$enable_sharedlibs" != "no" -a "$enable_sharedlibs" != "none" ; then + if test "$enable_shared" = "no" ; then + AC_MSG_ERROR([--enable-sharedlibs cannot be used without --enable-shared]) + fi +fi + +CC_SHL=true +C_LINK_SHL=true +C_LINKPATH_SHL="" +SHLIB_EXT=unknown +SHLIB_FROM_LO=no +SHLIB_INSTALL='$(INSTALL_PROGRAM)' +case "$enable_sharedlibs" in + no|none) + ;; + gcc-osx|osx-gcc) + AC_MSG_RESULT([Creating shared libraries using GNU for Mac OSX]) + C_LINK_SHL='${CC} -dynamiclib -undefined suppress -single_module -flat_namespace' + CC_SHL='${CC} -fPIC' + # No way in osx to specify the location of the shared libraries at link + # time (see the code in createshlib in mpich2/src/util) + # As of 10.5, -Wl,-rpath,dirname should work . The dirname + # must be a single directory, not a colon-separated list (use multiple + # -Wl,-rpath,path for each of the paths in the list). However, os x + # apparently records the library full path, so rpath isn't as useful + # as it is on other systems + C_LINKPATH_SHL="" + SHLIB_EXT="dylib" + enable_sharedlibs="osx-gcc" + ;; + gcc) + AC_MSG_RESULT([Creating shared libraries using GNU]) + # Not quite right yet. See mpich/util/makesharedlib + # Use syntax that works in both Make and the shell + #C_LINK_SHL='${CC} -shared -Wl,-r' + C_LINK_SHL='${CC} -shared' + # For example, include the libname as ${LIBNAME_SHL} + #C_LINK_SHL='${CC} -shared -Wl,-h,' + # May need -fPIC . Test to see which one works. + for sh_arg in "-fPIC" "-fpic" "-KPIC" ; do + PAC_C_CHECK_COMPILER_OPTION($sh_arg,works=yes,works=no) + if test "$works" = "yes" ; then + CC_SHL="${CC} ${sh_arg}" + break + fi + done + if test "$works" != "yes"; then + AC_MSG_ERROR([Cannot build shared libraries with this compiler]) + fi + # This used to have -Wl,-rpath earlier, but that causes problems + # on many systems. + if test $enable_rpath = "yes" ; then + C_LINKPATH_SHL="-Wl,-rpath," + fi + SHLIB_EXT=so + # We need to test that this isn't osx. The following is a + # simple hack + osname=`uname -s` + case $osname in + *Darwin*|*darwin*) + AC_MSG_ERROR([You must specify --enable-sharedlibs=osx-gcc for Mac OS/X]) + ;; + *CYGWIN*|*cygwin*) + AC_MSG_ERROR([You must specify --enable-sharedlibs=cygwin-gcc for Cygwin]) + ;; + *SunOS*) + AC_MSG_ERROR([You must specify --enable-sharedlibs=solaris-gcc for Solaris with gcc]) + ;; + esac + ;; + + cygwin|cygwin-gcc|gcc-cygwin) + AC_MSG_RESULT([Creating shared libraries using GNU under CYGWIN]) + C_LINK_SHL='${CC} -shared' + CC_SHL='${CC}' + # DLL Libraries need to be in the user's path (!) + C_LINKPATH_SHL="" + SHLIB_EXT="dll" + enable_sharedlibs="cygwin-gcc" + ;; + + libtool) + # set TRY_LIBTOOL to yes to experiment with libtool. You are on your + # own - only send fixes, not bug reports. + if test "$TRY_LIBTOOL" != yes ; then + AC_MSG_ERROR([Creating shared libraries using libtool not yet supported]) + else + # Using libtool requires a heavy-weight process to test for + # various stuff that libtool needs. Without this, you'll get a + # bizarre error message about libtool being unable to find + # configure.in or configure.ac (!) + # Libtool expects to see at least enable-shared. + if test "X$enable_shared" = "X" ; then enable_shared=yes ; fi + # Initialize libtool + # This works, but libtool version 2 places the object files + # in a different place, making it harder to integrate with + # our base approach. Disabling for now + dnl LT_PREREQ([2.2.6]) + dnl LT_INIT([disable-shared]) + AC_MSG_ERROR([To use this test verison, edit aclocal_shl.m4]) + # Likely to be + # either CC or CC_SHL is libtool $cc + CC_SHL='${LIBTOOL} --mode=compile ${CC}' + # CC_LINK_SHL includes the final installation path + # For many systems, the link may need to include *all* libraries + # (since many systems don't allow any unsatisfied dependencies) + # We need to give libtool the .lo file, not the .o files + SHLIB_FROM_LO=yes + # We also need to add -no-undefined when the compiler is gcc and + # we are building under cygwin + sysname=`uname -s | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ` + isCygwin=no + case "$sysname" in + *CYGWIN*) isCygwin=yes ;; + esac + if test "$isCygwin" = yes ; then + C_LINK_SHL='${LIBTOOL} --mode=link ${CC} -no-undefined -rpath ${libdir}' + else + C_LINK_SHL='${LIBTOOL} --mode=link ${CC} -rpath ${libdir}' + fi + if test $enable_rpath = "yes" ; then + C_LINKPATH_SHL="-rpath " + fi + # We also need a special install process with libtool. Note that this + # will also install the static libraries + SHLIB_INSTALL='$(LIBTOOL) --mode=install $(INSTALL_PROGRAM)' + # Note we may still need to add + #'$(LIBTOOL) --mode=finish $(libdir)' + fi + ;; +dnl +dnl Other, such as solaris-cc + solaris|solaris-cc) + AC_MSG_RESULT([Creating shared libraries using Solaris]) + # pic32 is appropriate for both 32 and 64 bit Solaris + C_LINK_SHL='${CC} -G -xcode=pic32' + CC_SHL='${CC} -xcode=pic32' + if test $enable_rpath = "yes" ; then + C_LINKPATH_SHL="-R" + fi + SHLIB_EXT=so + enable_sharedlibs="solaris-cc" + ;; + + solaris-gcc) + # This is the same as gcc, except for the C_LINKPATH_SHL + AC_MSG_RESULT([Creating shared libraries using Solaris with gcc]) + C_LINK_SHL='${CC} -shared' + CC_SHL='${CC} -fPIC' + if test $enable_rpath = "yes" ; then + C_LINKPATH_SHL="-R" + fi + SHLIB_EXT=so + enable_sharedlibs="solaris-gcc" + ;; + + linuxppc-xlc) + # This is only the beginning of xlc support, thanks to andy@vpac.org + CC_SHL='${CC} -qmkshrobj' + # More recent versions allow multiple args, separated by commas + if test $enable_rpath = "yes" ; then + C_LINKPATH_SHL="-Wl,-rpath," + fi + #C_LINKPATH_SHL="-Wl,-rpath -Wl," + C_LINK_SHL='${CC} -shared -qmkshrobj' + SHLIB_EXT=so + # Note that the full line should be more like + # $CLINKER -shared -qmkshrobj -Wl,-h,$libbase.$slsuffix -o ../shared/$libbase.$slsuffix *.o $OtherLibs + # for the appropriate values of $libbase and $slsuffix + # The -h name sets the name of the object; this is necessary to + # ensure that the dynamic linker can find the proper shared library. + ;; + + *) + AC_MSG_ERROR([Unknown value $enable_sharedlibs for enable-sharedlibs. Values should be gcc or osx-gcc]) + enable_sharedlibs=no + ;; +esac +# Check for the shared-library extension +PAC_CC_SHLIB_EXT +dnl +AC_SUBST(CC_SHL) +AC_SUBST(C_LINK_SHL) +AC_SUBST(C_LINKPATH_SHL) +AC_SUBST(SHLIB_EXT) +AC_SUBST(SHLIB_FROM_LO) +AC_SUBST(SHLIB_INSTALL) +]) + +dnl /*D +dnl PAC_xx_SHAREDLIBS - Get compiler and linker for shared libraries +dnl These routines may be used to determine the compiler and the +dnl linker to be used in creating shared libraries +dnl Rather than set predefined variable names, they set an argument +dnl (if provided) +dnl +dnl Synopsis +dnl PAC_CC_SHAREDLIBS(type,CCvar,CLINKvar) +dnl D*/ +AC_DEFUN([PAC_CC_SHAREDLIBS], +[ +pac_kinds=$1 +ifelse($1,,[ + pac_prog="" + AC_CHECK_PROG(pac_prog,gcc,yes,no) + # If we are gcc but OS X, set the special type + # We need a similar setting for cygwin + if test "$pac_prog" = yes ; then + osname=`uname -s` + case $osname in + *Darwin*|*darwin*) pac_kinds=gcc-osx + ;; + *) pac_kinds=gcc + ;; + esac + fi + pac_prog="" + AC_CHECK_PROG(pac_prog,libtool,yes,no) + if test "$pac_prog" = yes ; then pac_kinds="$pac_kinds libtool" ; fi +]) +for pac_arg in $pac_kinds ; do + case $pac_arg in + gcc) + # For example, include the libname as ${LIBNAME_SHL} + #C_LINK_SHL='${CC} -shared -Wl,-h,' + pac_cc_sharedlibs='gcc -shared' + # Make sure we select the correct fpic option + PAC_C_CHECK_COMPILER_OPTION(-fPIC,fPIC_OK=yes,fPIC_OK=no) + if test "$fPIC_OK" != yes ; then + PAC_C_CHECK_COMPILER_OPTION(-fpic,fpic_ok=yes,fpic_ok=no) + if test "$fpic_ok" != yes ; then + AC_MSG_ERROR([Neither -fpic nor -fPIC accepted by $CC]) + else + pac_cc_sharedlibs="$pac_cc_sharedlibs -fpic" + fi + else + pac_cc_sharedlibs="$pac_cc_sharedlibs -fPIC" + fi + pac_clink_sharedlibs='gcc -shared' + pac_type_sharedlibs=gcc + ;; + gcc-osx|osx-gcc) + pac_clink_sharedlibs='${CC} -dynamiclib -undefined suppress -single_module -flat_namespace' + pac_cc_sharedlibs='${CC} -fPIC' + pac_type_sharedlibs=gcc-osx + ;; + libtool) + AC_CHECK_PROGS(LIBTOOL,libtool,false) + if test "$LIBTOOL" = "false" ; then + AC_MSG_WARN([Could not find libtool]) + else + # Likely to be + # either CC or CC_SHL is libtool $cc + pac_cc_sharedlibs'${LIBTOOL} -mode=compile ${CC}' + pac_clink_sharedlibs='${LIBTOOL} -mode=link ${CC} -rpath ${libdir}' + pac_type_sharedlibs=libtool + fi + ;; + *) + ;; + esac + if test -n "$pac_cc_sharedlibs" ; then break ; fi +done +if test -z "$pac_cc_sharedlibs" ; then pac_cc_sharedlibs=true ; fi +if test -z "$pac_clink_sharedlibs" ; then pac_clink_sharedlibs=true ; fi +ifelse($2,,CC_SHL=$pac_cc_sharedlibs,$2=$pac_cc_sharedlibs) +ifelse($3,,C_LINK_SHL=$pac_clink_sharedlibs,$3=$pac_clink_sharedlibs) +ifelse($4,,SHAREDLIB_TYPE=$pac_type_sharedlibs,$4=$pac_type_sharedlibs) +]) + +dnl This macro ensures that all of the necessary substitutions are +dnl made by any subdirectory configure (which may simply SUBST the +dnl necessary values rather than trying to determine them from scratch) +dnl This is a more robust (and, in the case of libtool, only +dnl managable) method. +AC_DEFUN([PAC_CC_SUBDIR_SHLIBS],[ + AC_SUBST(CC_SHL) + AC_SUBST(C_LINK_SHL) + AC_SUBST(LIBTOOL) + AC_SUBST(ENABLE_SHLIB) + AC_SUBST(SHLIB_EXT) + if test "$ENABLE_SHLIB" = "libtool" ; then + if test -z "$LIBTOOL" ; then + AC_MSG_WARN([libtool selected for shared library support but LIBTOOL is not defined]) + fi + # Libtool needs master_top_builddir + if test "X$master_top_builddir" = "X" ; then + AC_MSG_ERROR([Libtool requires master_top_builddir - check configure.in sources]) + fi + AC_SUBST(master_top_builddir) + fi +]) + +dnl PAC_CC_SHLIB_EXT - get the extension for shared libraries +dnl Set the variable SHLIB_EXT if it is other than unknown. +AC_DEFUN([PAC_CC_SHLIB_EXT],[ +# Not all systems use .so as the extension for shared libraries (cygwin +# and OSX are two important examples). If we did not set the SHLIB_EXT, +# then try and determine it. We need this to properly implement +# clean steps that look for libfoo.$SHLIB_EXT . +if test "$SHLIB_EXT" = "unknown" ; then + osname=`uname -s` + case $osname in + *Darwin*|*darwin*) SHLIB_EXT=dylib + ;; + *CYGWIN*|*cygwin*) SHLIB_EXT=dll + ;; + *Linux*|*LINUX*|*SunOS*) SHLIB_EXT=so + ;; + esac +fi +]) diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_subcfg.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_subcfg.m4 new file mode 100644 index 0000000000..9f8851ed92 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_subcfg.m4 @@ -0,0 +1,137 @@ +dnl PAC_RESET_ALL_FLAGS - Reset precious flags to those set by the user +AC_DEFUN([PAC_RESET_ALL_FLAGS],[ + if test "$FROM_MPICH2" = "yes" ; then + CFLAGS="$USER_CFLAGS" + CPPFLAGS="$USER_CPPFLAGS" + CXXFLAGS="$USER_CXXFLAGS" + FFLAGS="$USER_FFLAGS" + FCFLAGS="$USER_FCFLAGS" + LDFLAGS="$USER_LDFLAGS" + LIBS="$USER_LIBS" + fi +]) + +dnl PAC_RESET_LINK_FLAGS - Reset precious link flags to those set by the user +AC_DEFUN([PAC_RESET_LINK_FLAGS],[ + if test "$FROM_MPICH2" = "yes" ; then + LDFLAGS="$USER_LDFLAGS" + LIBS="$USER_LIBS" + fi +]) + +dnl Sandbox configure with additional arguments +dnl Usage: PAC_CONFIG_SUBDIR_ARGS(subdir,configure-args,action-if-success,action-if-failure) +dnl +dnl The subconfigure argument list is created based on "ac_precious_vars" +dnl instead of explicitly use of well-known Makefile variables, like +dnl CC/CFLAGS/CPPFLAGS..., this generalization is effective as long as +dnl calling configure.in declares the needed variables to be passed down +dnl to subconfigure as "precious" appropriately. The precious variable +dnl can be created in the following ways: +dnl 1) implicit declaration through use of autoconf macros, like +dnl AC_PROG_CC (declares CC/CFLAGS/CPPFLAGS/LIBS/LDFLAGS), or +dnl AC_PROG_F77 (declares F77/FFLAGS/FLIBS) ... +dnl which are in turns invoked by other subconfigure. +dnl When in doubt, check "ac_precious_var" in the calling configure. +dnl 2) explicit "precious" declaration through AC_ARG_VAR. +dnl Without correct "precious" declaration in the calling configure.in, +dnl there would be variables not being included in the subconfigure +dnl argument list. +dnl +dnl Note: I suspect this DEFUN body is underquoted in places, but it does not +dnl seem to cause problems in practice yet. [goodell@ 2010-05-18] +AC_DEFUN([PAC_CONFIG_SUBDIR_ARGS],[ + AC_MSG_NOTICE([===== configuring $1 =====]) + + PAC_MKDIRS($1) + pac_abs_srcdir=`(cd $srcdir && pwd)` + + if test -f $pac_abs_srcdir/$1/setup ; then + . $pac_abs_srcdir/$1/setup + fi + + pac_subconfigure_file="$pac_abs_srcdir/$1/configure" + if test -x $pac_subconfigure_file ; then + pac_subconfig_args="$2" + + # Set IFS so ac_configure_args can be tokenized + # with extra " " tokens being skipped. + saved_IFS="$IFS" + IFS="'" + for pac_arg in $ac_configure_args ; do + case "$pac_arg" in + # Ignore any null and leading blank strings. + ""|" "*) + ;; + *) + pac_pval="" + # Restore saved IFS so ac_precious_vars which has + # " " as separator can be correctly tokenized + IFS="$saved_IFS" + for pac_pvar in $ac_precious_vars ; do + # check if configure argument token contains the + # precious variable, i.e. "name_of_prec_var=". + pvar_in_arg=`echo $pac_arg | grep "$pac_pvar="` + if test "X$pvar_in_arg" != "X" ; then + # check if current precious variable is set in env + eval pvar_set=\${$pac_pvar+set} + if test "$pvar_set" = "set" ; then + # Append 'name_of_prec_var=value_of_prec_var' + # to the subconfigure arguments list, where + # value_of_prec_var is fetched from the env. + eval pac_pval=\${$pac_pvar} + pac_subconfig_args="$pac_subconfig_args '$pac_pvar=$pac_pval'" + break + fi + fi + done + # since the precious variable is not set in the env., + # append the corresponding configure argument token + # to the subconfigure argument list. + if test "X$pac_pval" = "X" ; then + pac_subconfig_args="$pac_subconfig_args '$pac_arg'" + fi + # reset "'" as IFS to process ac_configure_args + saved_IFS="$IFS" + IFS="'" + ;; + esac + done + # Restore IFS. + IFS="$saved_IFS" + dnl echo "pac_subconfig_args = |$pac_subconfig_args|" + + dnl Add option to disable configure options checking + if test "$enable_option_checking" = no ; then + pac_subconfig_args="$pac_subconfig_args --disable-option-checking" + fi + + AC_MSG_NOTICE([executing: $pac_subconfigure_file $pac_subconfig_args]) + if (cd $1 && eval $pac_subconfigure_file $pac_subconfig_args) ; then + ifelse([$3],[],[:],[$3]) + else + ifelse([$4],[],[:],[$4]) + fi + else + if test -e $pac_subconfigure_file ; then + AC_MSG_WARN([$pac_subconfigure_file exists but is not executable]) + else + AC_MSG_WARN([$pac_subconfigure_file does not exist]) + fi + fi + + AC_MSG_NOTICE([===== done with $1 configure =====]) + + # Check for any localdefs files. These may be created, so we + # look in the local directory first. + if test -f $1/localdefs ; then + . $1/localdefs + elif test -f $pac_abs_srcdir/$1/localdefs ; then + . $pac_abs_srcdir/$1/localdefs + fi +]) + +dnl Sandbox configure +dnl Usage: PAC_CONFIG_SUBDIR(subdir,action-if-success,action-if-failure) +AC_DEFUN([PAC_CONFIG_SUBDIR],[PAC_CONFIG_SUBDIR_ARGS([$1],[],[$2],[$3])]) + diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_util.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_util.m4 new file mode 100644 index 0000000000..3eecb22772 --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/aclocal_util.m4 @@ -0,0 +1,191 @@ +dnl Nesting safe macros for saving variables +dnl Usage: PAC_PUSH_FLAG(CFLAGS) +AC_DEFUN([PAC_PUSH_FLAG],[ + if test -z "${pac_save_$1_nesting}" ; then + pac_save_$1_nesting=0 + fi + eval pac_save_$1_${pac_save_$1_nesting}='"$$1"' + pac_save_$1_nesting=`expr ${pac_save_$1_nesting} + 1` +]) + +dnl Usage: PAC_POP_FLAG(CFLAGS) +AC_DEFUN([PAC_POP_FLAG],[ + pac_save_$1_nesting=`expr ${pac_save_$1_nesting} - 1` + eval $1="\$pac_save_$1_${pac_save_$1_nesting}" + eval pac_save_$1_${pac_save_$1_nesting}="" +]) + +dnl Usage: PAC_PUSH_ALL_FLAGS +AC_DEFUN([PAC_PUSH_ALL_FLAGS],[ + PAC_PUSH_FLAG(CFLAGS) + PAC_PUSH_FLAG(CPPFLAGS) + PAC_PUSH_FLAG(CXXFLAGS) + PAC_PUSH_FLAG(FFLAGS) + PAC_PUSH_FLAG(FCFLAGS) + PAC_PUSH_FLAG(LDFLAGS) + PAC_PUSH_FLAG(LIBS) +]) + +dnl Usage: PAC_POP_ALL_FLAGS +AC_DEFUN([PAC_POP_ALL_FLAGS],[ + PAC_POP_FLAG(CFLAGS) + PAC_POP_FLAG(CPPFLAGS) + PAC_POP_FLAG(CXXFLAGS) + PAC_POP_FLAG(FFLAGS) + PAC_POP_FLAG(FCFLAGS) + PAC_POP_FLAG(LDFLAGS) + PAC_POP_FLAG(LIBS) +]) + +dnl PAC_PREFIX_FLAG - Save flag with a prefix +dnl Usage: PAC_PREFIX_FLAG(PREFIX, FLAG) +AC_DEFUN([PAC_PREFIX_FLAG],[ + $1_$2=$$2 + export $1_$2 + AC_SUBST($1_$2) +]) + +dnl PAC_PREFIX_ALL_FLAGS - Save flags with a prefix +dnl Usage: PAC_PREFIX_ALL_FLAGS(PREFIX) +AC_DEFUN([PAC_PREFIX_ALL_FLAGS],[ + PAC_PREFIX_FLAG($1, CFLAGS) + PAC_PREFIX_FLAG($1, CPPFLAGS) + PAC_PREFIX_FLAG($1, CXXFLAGS) + PAC_PREFIX_FLAG($1, FFLAGS) + PAC_PREFIX_FLAG($1, FCFLAGS) + PAC_PREFIX_FLAG($1, LDFLAGS) + PAC_PREFIX_FLAG($1, LIBS) +]) + +dnl Usage: PAC_APPEND_FLAG([-02], [CFLAGS]) +dnl appends the given argument to the specified shell variable unless the +dnl argument is already present in the variable +AC_DEFUN([PAC_APPEND_FLAG],[ + AC_REQUIRE([AC_PROG_FGREP]) + AS_IF( + [echo "$$2" | $FGREP -e '$1' >/dev/null 2>&1], + [echo "$2(='$$2') contains '$1', not appending" >&AS_MESSAGE_LOG_FD], + [echo "$2(='$$2') does not contain '$1', appending" >&AS_MESSAGE_LOG_FD + $2="$$2 $1"] + ) +]) + +dnl Usage: PAC_PREPEND_FLAG([-lpthread], [LIBS]) +dnl Prepends the given argument to the specified shell variable unless the +dnl argument is already present in the variable. +dnl +dnl This is typically used for LIBS and similar variables because libraries +dnl should be added in reverse order. +AC_DEFUN([PAC_PREPEND_FLAG],[ + AC_REQUIRE([AC_PROG_FGREP]) + AS_IF( + [echo "$$2" | $FGREP -e '$1' >/dev/null 2>&1], + [echo "$2(='$$2') contains '$1', not prepending" >&AS_MESSAGE_LOG_FD], + [echo "$2(='$$2') does not contain '$1', prepending" >&AS_MESSAGE_LOG_FD + $2="$1 $$2"] + ) +]) + + +dnl PAC_MKDIRS(path) +dnl Create any missing directories in the path +AC_DEFUN([PAC_MKDIRS],[ +# Build any intermediate directories +for dir in $1 ; do + saveIFS="$IFS" + IFS="/" + tmp_curdir="" + for tmp_subdir in $dir ; do + tmp_curdir="${tmp_curdir}$tmp_subdir" + if test ! -d "$tmp_curdir" ; then mkdir "$tmp_curdir" ; fi + tmp_curdir="${tmp_curdir}/" + done + IFS="$saveIFS" +done +]) + +# Find something to use for mkdir -p. Eventually, this will have a +# script for backup. As of autoconf-2.63, AC_PROG_MKDIR_P was broken; +# it was checking to see if it recognized the "version" of mkdir and +# was deciding based on that. This should always be a feature test. +AC_DEFUN([PAC_PROG_MKDIR_P],[ +AC_CACHE_CHECK([whether mkdir -p works], +pac_cv_mkdir_p,[ +pac_cv_mkdir_p=no +rm -rf .tmp +if mkdir -p .tmp/.foo 1>/dev/null 2>&1 ; then + if test -d .tmp/.foo ; then + pac_cv_mkdir_p=yes + fi +fi +rm -rf .tmp +]) +if test "$pac_cv_mkdir_p" = "yes" ; then + MKDIR_P="mkdir -p" + export MKDIR_P +else + AC_MSG_WARN([mkdir -p does not work; the install step may fail]) +fi +AC_SUBST(MKDIR_P) +]) + +dnl Test for a clean VPATH directory. Provide this command with the names +dnl of all of the generated files that might cause problems +dnl (Makefiles won't cause problems because there's no VPATH usage for them) +dnl +dnl Synopsis +dnl PAC_VPATH_CHECK([file-names],[directory-names]) +dnl file-names should be files other than config.status and any header (e.g., +dnl fooconf.h) file that should be removed. It is optional +AC_DEFUN([PAC_VPATH_CHECK],[ +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +date >conftest$$ +# If creating a file in the current directory does not show up in the srcdir +# then we're doing a VPATH build (or something is very wrong) +if test ! -s $srcdir/conftest$$ ; then + pac_dirtyfiles="" + pac_dirtydirs="" + pac_header="" + ifdef([AC_LIST_HEADER],[pac_header=AC_LIST_HEADER]) + for file in config.status $pac_header $1 ; do + if test -f $srcdir/$file ; then + pac_dirtyfiles="$pac_dirtyfiles $file" + fi + done + ifelse($2,,,[ + for dir in $2 ; do + if test -d $srcdir/$dir ; then + pac_dirtydirs="$pac_dirtydirs $dir" + fi + done + ]) + + if test -n "$pac_dirtyfiles" -o -n "$pac_dirtydirs" ; then + # Create a nice message about what to remove + rmmsg="" + if test -n "$pac_dirtyfiles" ; then + rmmsg="files $pac_dirtyfiles" + fi + if test -n "$pac_dirtydirs" ; then + if test -n "$rmmsg" ; then + rmmsg="$rmmsg and directories $pac_dirtydirs" + else + rmmsg="directories $pac_dirtydirs" + fi + fi + if test -f $srcdir/Makefile ; then + AC_MSG_ERROR([You cannot do a VPATH build if the source directory has been + configured. Run "make distclean" in $srcdir first and make sure that the + $rmmsg have been removed.]) + else + AC_MSG_ERROR([You cannot do a VPATH build if the source directory has been + configured. Remove the $rmmsg in $srcdir.]) + fi + fi +fi +# This is needed for Mac OSX 10.5 +rm -rf conftest.dSYM +rm -f conftest* +]) diff --git a/ompi/mca/io/romio/romio/confdb/ax_prefix_config_h.m4 b/ompi/mca/io/romio/romio/confdb/ax_prefix_config_h.m4 new file mode 100644 index 0000000000..83f8df6f4b --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/ax_prefix_config_h.m4 @@ -0,0 +1,219 @@ +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_prefix_config_h.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PREFIX_CONFIG_H [(OUTPUT-HEADER [,PREFIX [,ORIG-HEADER]])] +# +# DESCRIPTION +# +# This is a new variant from ac_prefix_config_ this one will use a +# lowercase-prefix if the config-define was starting with a +# lowercase-char, e.g. "#define const", "#define restrict", or "#define +# off_t", (and this one can live in another directory, e.g. +# testpkg/config.h therefore I decided to move the output-header to be the +# first arg) +# +# takes the usual config.h generated header file; looks for each of the +# generated "#define SOMEDEF" lines, and prefixes the defined name (ie. +# makes it "#define PREFIX_SOMEDEF". The result is written to the output +# config.header file. The PREFIX is converted to uppercase for the +# conversions. +# +# Defaults: +# +# OUTPUT-HEADER = $PACKAGE-config.h +# PREFIX = $PACKAGE +# ORIG-HEADER, from AM_CONFIG_HEADER(config.h) +# +# Your configure.ac script should contain both macros in this order, and +# unlike the earlier variations of this prefix-macro it is okay to place +# the AX_PREFIX_CONFIG_H call before the AC_OUTPUT invokation. +# +# Example: +# +# AC_INIT(config.h.in) # config.h.in as created by "autoheader" +# AM_INIT_AUTOMAKE(testpkg, 0.1.1) # makes #undef VERSION and PACKAGE +# AM_CONFIG_HEADER(config.h) # prep config.h from config.h.in +# AX_PREFIX_CONFIG_H(mylib/_config.h) # prep mylib/_config.h from it.. +# AC_MEMORY_H # makes "#undef NEED_MEMORY_H" +# AC_C_CONST_H # makes "#undef const" +# AC_OUTPUT(Makefile) # creates the "config.h" now +# # and also mylib/_config.h +# +# if the argument to AX_PREFIX_CONFIG_H would have been omitted then the +# default outputfile would have been called simply "testpkg-config.h", but +# even under the name "mylib/_config.h" it contains prefix-defines like +# +# #ifndef TESTPKG_VERSION +# #define TESTPKG_VERSION "0.1.1" +# #endif +# #ifndef TESTPKG_NEED_MEMORY_H +# #define TESTPKG_NEED_MEMORY_H 1 +# #endif +# #ifndef _testpkg_const +# #define _testpkg_const _const +# #endif +# +# and this "mylib/_config.h" can be installed along with other +# header-files, which is most convenient when creating a shared library +# (that has some headers) where some functionality is dependent on the +# OS-features detected at compile-time. No need to invent some +# "mylib-confdefs.h.in" manually. :-) +# +# Note that some AC_DEFINEs that end up in the config.h file are actually +# self-referential - e.g. AC_C_INLINE, AC_C_CONST, and the AC_TYPE_OFF_T +# say that they "will define inline|const|off_t if the system does not do +# it by itself". You might want to clean up about these - consider an +# extra mylib/conf.h that reads something like: +# +# #include +# #ifndef _testpkg_const +# #define _testpkg_const const +# #endif +# +# and then start using _testpkg_const in the header files. That is also a +# good thing to differentiate whether some library-user has starting to +# take up with a different compiler, so perhaps it could read something +# like this: +# +# #ifdef _MSC_VER +# #include +# #else +# #include +# #endif +# #ifndef _testpkg_const +# #define _testpkg_const const +# #endif +# +# LAST MODIFICATION +# +# 2008-04-12 +# +# COPYLEFT +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2008 Marten Svantesson +# Copyright (c) 2008 Gerald Point +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Macro Archive. When you make and +# distribute a modified version of the Autoconf Macro, you may extend this +# special exception to the GPL to apply to your modified version as well. + +AC_DEFUN([AX_PREFIX_CONFIG_H],[dnl +AC_BEFORE([AC_CONFIG_HEADERS],[$0])dnl +AC_CONFIG_COMMANDS([ifelse($1,,$PACKAGE-config.h,$1)],[dnl +AS_VAR_PUSHDEF([_OUT],[ac_prefix_conf_OUT])dnl +AS_VAR_PUSHDEF([_DEF],[ac_prefix_conf_DEF])dnl +AS_VAR_PUSHDEF([_PKG],[ac_prefix_conf_PKG])dnl +AS_VAR_PUSHDEF([_LOW],[ac_prefix_conf_LOW])dnl +AS_VAR_PUSHDEF([_UPP],[ac_prefix_conf_UPP])dnl +AS_VAR_PUSHDEF([_INP],[ac_prefix_conf_INP])dnl +m4_pushdef([_script],[conftest.prefix])dnl +m4_pushdef([_symbol],[m4_cr_Letters[]m4_cr_digits[]_])dnl +_OUT=`echo ifelse($1, , $PACKAGE-config.h, $1)` +_DEF=`echo _$_OUT | sed -e "y:m4_cr_letters:m4_cr_LETTERS[]:" -e "s/@<:@^m4_cr_Letters@:>@/_/g"` +_PKG=`echo ifelse($2, , $PACKAGE, $2)` +_LOW=`echo _$_PKG | sed -e "y:m4_cr_LETTERS-:m4_cr_letters[]_:"` +_UPP=`echo $_PKG | sed -e "y:m4_cr_letters-:m4_cr_LETTERS[]_:" -e "/^@<:@m4_cr_digits@:>@/s/^/_/"` +_INP=`echo "ifelse($3,,,$3)" | sed -e 's/ *//'` +if test ".$_INP" = "."; then + for ac_file in : $CONFIG_HEADERS; do test "_$ac_file" = _: && continue + case "$ac_file" in + *.h) _INP=$ac_file ;; + *) + esac + test ".$_INP" != "." && break + done +fi +if test ".$_INP" = "."; then + case "$_OUT" in + */*) _INP=`basename "$_OUT"` + ;; + *-*) _INP=`echo "$_OUT" | sed -e "s/@<:@_symbol@:>@*-//"` + ;; + *) _INP=config.h + ;; + esac +fi +if test -z "$_PKG" ; then + AC_MSG_ERROR([no prefix for _PREFIX_PKG_CONFIG_H]) +else + if test ! -f "$_INP" ; then if test -f "$srcdir/$_INP" ; then + _INP="$srcdir/$_INP" + fi fi + AC_MSG_NOTICE(creating $_OUT - prefix $_UPP for $_INP defines) + if test -f $_INP ; then + echo "s/^@%:@undef *\\(@<:@m4_cr_LETTERS[]_@:>@\\)/@%:@undef $_UPP""_\\1/" > _script + echo "s/^@%:@undef *\\(@<:@m4_cr_letters@:>@\\)/@%:@undef $_LOW""_\\1/" >> _script + echo "s/^@%:@def[]ine *\\(@<:@m4_cr_LETTERS[]_@:>@@<:@_symbol@:>@*\\)\\(.*\\)/@%:@ifndef $_UPP""_\\1 \\" >> _script + echo "@%:@def[]ine $_UPP""_\\1 \\2 \\" >> _script + echo "@%:@endif/" >>_script + echo "s/^@%:@def[]ine *\\(@<:@m4_cr_letters@:>@@<:@_symbol@:>@*\\)\\(.*\\)/@%:@ifndef $_LOW""_\\1 \\" >> _script + echo "@%:@define $_LOW""_\\1 \\2 \\" >> _script + echo "@%:@endif/" >> _script + # now executing _script on _DEF input to create _OUT output file + echo "@%:@ifndef $_DEF" >$tmp/pconfig.h + echo "@%:@def[]ine $_DEF 1" >>$tmp/pconfig.h + echo ' ' >>$tmp/pconfig.h + echo /'*' $_OUT. Generated automatically at end of configure. '*'/ >>$tmp/pconfig.h + + sed -f _script $_INP >>$tmp/pconfig.h + echo ' ' >>$tmp/pconfig.h + echo '/* once:' $_DEF '*/' >>$tmp/pconfig.h + echo "@%:@endif" >>$tmp/pconfig.h + if cmp -s $_OUT $tmp/pconfig.h 2>/dev/null; then + AC_MSG_NOTICE([$_OUT is unchanged]) + else + ac_dir=`AS_DIRNAME(["$_OUT"])` + AS_MKDIR_P(["$ac_dir"]) + rm -f "$_OUT" + mv $tmp/pconfig.h "$_OUT" + fi + cp _script _configs.sed + else + AC_MSG_ERROR([input file $_INP does not exist - skip generating $_OUT]) + fi + rm -f conftest.* +fi +m4_popdef([_symbol])dnl +m4_popdef([_script])dnl +AS_VAR_POPDEF([_INP])dnl +AS_VAR_POPDEF([_UPP])dnl +AS_VAR_POPDEF([_LOW])dnl +AS_VAR_POPDEF([_PKG])dnl +AS_VAR_POPDEF([_DEF])dnl +AS_VAR_POPDEF([_OUT])dnl +],[PACKAGE="$PACKAGE"])]) + +dnl implementation note: a bug report (31.5.2005) from Marten Svantesson points +dnl out a problem where `echo "\1"` results in a Control-A. The unix standard +dnl http://www.opengroup.org/onlinepubs/000095399/utilities/echo.html +dnl defines all backslash-sequences to be inherently non-portable asking +dnl for replacement mit printf. Some old systems had problems with that +dnl one either. However, the latest libtool (!) release does export an $ECHO +dnl (and $echo) that does the right thing - just one question is left: what +dnl was the first version to have it? Is it greater 2.58 ? diff --git a/ompi/mca/io/romio/romio/confdb/ax_tls.m4 b/ompi/mca/io/romio/romio/confdb/ax_tls.m4 new file mode 100644 index 0000000000..f7fc68840b --- /dev/null +++ b/ompi/mca/io/romio/romio/confdb/ax_tls.m4 @@ -0,0 +1,74 @@ +# =========================================================================== +# http://www.nongnu.org/autoconf-archive/ax_tls.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_TLS +# +# DESCRIPTION +# +# Provides a test for the compiler support of thread local storage (TLS) +# extensions. Defines TLS if it is found. Currently only knows about GCC +# and MSVC. I think SunPro uses the same as GCC, and Borland apparently +# supports either. +# +# LICENSE +# +# Copyright (c) 2008 Alan Woodland +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +AC_DEFUN([AX_TLS], [ + AC_MSG_CHECKING(for thread local storage specifier) + AC_CACHE_VAL(ac_cv_tls, [ + ax_tls_keywords="__thread __declspec(thread) none" + for ax_tls_keyword in $ax_tls_keywords; do + case $ax_tls_keyword in + none) ac_cv_tls=none ; break ;; + *) + # MPICH2 modification: This was an AC_TRY_COMPILE before, but + # Darwin with non-standard compilers will accept __thread at + # compile time but fail to link due to an undefined + # "__emutls_get_address" symbol unless -lgcc_eh is added to the + # link line. + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([$ax_tls_keyword int bar = 5;],[++bar;])], + [ac_cv_tls=$ax_tls_keyword ; break], + [ac_cv_tls=none]) + esac + done +]) + + if test "$ac_cv_tls" != "none"; then + # MPICH2 modification: this was "TLS" before instead of + # "MPIU_TLS_SPECIFIER", but TLS had a reasonably high chance of conflicting + # with a system library. + AC_DEFINE_UNQUOTED([MPIU_TLS_SPECIFIER], $ac_cv_tls, [If the compiler supports a TLS storage class define it to that here]) + fi + AC_MSG_RESULT($ac_cv_tls) +]) diff --git a/ompi/mca/io/romio/romio/configure.in b/ompi/mca/io/romio/romio/configure.in index 637688eb0f..ff22bb7769 100644 --- a/ompi/mca/io/romio/romio/configure.in +++ b/ompi/mca/io/romio/romio/configure.in @@ -3,6 +3,9 @@ # autoconf --localdir=../confdb configure.in # (or wherever the confdb is) # +# irrelevant / unnecessary in an Open MPI environment, but are +# harmless and are left here solely for the sake of ease of future +# patching/importing. # Open MPI: Modifications to this file were done on an "let's do the # minimum possible" basis, not so that we can skip on the work or @@ -33,7 +36,6 @@ if test "$FROM_MPICH2" = "yes" ; then fi AC_CONFIG_HEADER(adio/include/romioconf.h) - # Open MPI: added AH_TOP AH_TOP([#include "romioconf-undefs.h"]) @@ -106,7 +108,6 @@ MPI_FARRAY6="" MPI_FARRAY7="" DEFS="" ROMIO_LFLAGS="" -ROMIO_LIBLIST="" ROMIO_TCFLAGS="" ROMIO_TCPPFLAGS="" ROMIO_TFFLAGS="" @@ -143,7 +144,7 @@ MPIO_REQ_REAL_POBJECTS="_iotest.o _iowait.o _iowaitall.o _iowaitany.o _iotestall # have_aio=no # -known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre bgl bglockless" +known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre bgl bglockless zoidfs" # Open MPI: added "open_mpi_mpi" known_mpi_impls="mpich2_mpi mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi open_mpi_mpi" # @@ -202,8 +203,11 @@ dnl the moment to allow ROMIO to be separatedly distributed. dnl scripts. AC_CONFIG_AUX_DIR(confdb) +# Check if Make is working +PAC_PROG_MAKE + # Open MPI: Init automake -AM_INIT_AUTOMAKE(io-romio, 1.0.0, 'no') +AM_INIT_AUTOMAKE(io-romio, 1.2.6, 'no') # Open MPI: If Automake supports silent rules, enable them. m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) @@ -479,13 +483,13 @@ case $ARCH in ;; esac -AC_PROG_CC +PAC_PROG_CC if test "$NOF77" != 1 ; then # Grrr. The autoconf test for F77 will abort the configure # if no compiler is found. We'd prefer to simply turn off # support for Fortran, and/or give a more informative message. - AC_PROG_F77 + PAC_PROG_F77 fi if test "$CC" = "gcc" -a -z "$C_DEBUG_FLAG" ; then C_DEBUG_FLAG="-g -O -Wall -Wstrict-prototypes -Wmissing-prototypes" @@ -523,7 +527,7 @@ if test -n "$arch_rs6000"; then fi AC_DEFINE(AIX,1,[Define for AIX]) # assume long long exists. - longlongsize=${longlongsize:-8} + ac_cv_sizeof_long_long=${ac_cv_sizeof_long_long:-8} MPI_OFFSET_KIND1=" INTEGER MPI_OFFSET_KIND" MPI_OFFSET_KIND2=" PARAMETER (MPI_OFFSET_KIND=8)" MPI_OFFSET_KIND_VAL=8 @@ -547,7 +551,7 @@ fi # if test -n "$arch_freebsd" || test -n "$arch_LINUX" || test -n "$arch_LINUX_ALPHA" || test -n "$arch_netbsd" || test -n "$arch_openbsd" ; then if test -n "$arch_freebsd" || test -n "$arch_netbsd" || test -n "$arch_openbsd"; then - longlongsize=${longlongsize:-0} + ac_cv_sizeof_long_long=${ac_cv_sizeof_long_long:-0} # printf doesn't work properly and no integer*8 as far as I can tell fi # Find the CPP before the header check @@ -634,7 +638,7 @@ if test -n "$arch_alpha" || test -n "$arch_ALPHA" ; then MPI_OFFSET_KIND1=" INTEGER MPI_OFFSET_KIND" MPI_OFFSET_KIND2=" PARAMETER (MPI_OFFSET_KIND=8)" MPI_OFFSET_KIND_VAL=8 - ROMIO_LIBLIST="$ROMIO_LIBLIST -laio" + LIBS="$LIBS -laio" fi # if test -n "$arch_CRAY" ; then @@ -719,7 +723,7 @@ if test -n "$arch_IRIX"; then mpi_mpich=1 fi fi - #OMPI: Bad for automake: RANLIB=":" + #OMPI: Bad for automake: RANLIB=":" AC_DEFINE(AIO_SIGNOTIFY_NONE,1,[Define if no signotify]) if test $cputype -ge 5000 ; then MIPS=4 @@ -783,12 +787,29 @@ dnl AC_C_INLINE # Header files # Find the CPP before the header check AC_PROG_CPP -AC_CHECK_HEADERS(unistd.h fcntl.h malloc.h stddef.h) +AC_CHECK_HEADERS([unistd.h fcntl.h malloc.h stddef.h sys/types.h]) # -CROSS_SIZEOF_INT=${CROSS_SIZEOF_INT:-0} -CROSS_SIZEOF_VOID_P=${CROSS_SIZEOF_VOID_P:-0} -AC_CHECK_SIZEOF(int,$CROSS_SIZEOF_INT) -AC_CHECK_SIZEOF(void *,$CROSS_SIZEOF_VOID_P) + +# When compiling ROMIO on Darwin with _POSIX_C_SOURCE defined (such as when +# using --enable-strict in MPICH2), sys/types.h does not define u_short and +# friends unless _DARWIN_C_SOURCE is also defined (see compat(5) on a Darwin +# box). This would normally be fine, except sys/stat.h defines struct stat to +# use u_long, so strict compiles fail. One option is to also compile with +# _DARWIN_C_SOURCE, but this disables much of the strictness that is intended +# by _POSIX_C_SOURCE. Instead we just define our own types if they are not +# provided by the system. This isn't quite as safe as typedef'ing the +# replacement types, but it will apply to later configure tests, which is +# important. +AC_CHECK_TYPE([u_char],[],[AC_DEFINE_UNQUOTED([u_char],[unsigned char],[Define to "unsigned char" if sys/types.h does not define.])]) +AC_CHECK_TYPE([u_short],[],[AC_DEFINE_UNQUOTED([u_short],[unsigned short],[Define to "unsigned short" if sys/types.h does not define.])]) +AC_CHECK_TYPE([u_int],[],[AC_DEFINE_UNQUOTED([u_int],[unsigned int],[Define to "unsigned int" if sys/types.h does not define.])]) +AC_CHECK_TYPE([u_long],[],[AC_DEFINE_UNQUOTED([u_long],[unsigned long],[Define to "unsigned long" if sys/types.h does not define.])]) + +# must come _after_ the above checks for u_char/u_short/u_int/u_long +AC_CHECK_HEADERS([sys/attr.h]) + +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(void *) AC_CACHE_CHECK([for int large enough for pointers], pac_cv_int_hold_pointer,[ if test "$ac_cv_sizeof_int" = "0" -o \ @@ -805,14 +826,10 @@ if test "$pac_cv_int_hold_pointer" != yes ; then dnl Switch to a conforming name (start with HAVE or USE) AC_DEFINE(HAVE_INT_LT_POINTER,1,[Define if int smaller than pointer]) fi -# -dnl The original ROMIO configure used a set of complex tests here; this -dnl is a partial reworking using the autoconf2 sizeof macros, and allowing -dnl for the standardized CROSS_xxx varaibles for cross-compilation environments + # LL is the printf-style format name for output of a MPI_Offset. # We have to match this to the type that we use for MPI_Offset. -CROSS_SIZEOF_LONG_LONG=${CROSS_SIZEOF_LONG_LONG:-0} -AC_CHECK_SIZEOF(long long,$CROSS_SIZEOF_LONG_LONG) +AC_CHECK_SIZEOF(long long) if test "$ac_cv_sizeof_long_long" != 0 ; then if test "$ac_cv_sizeof_long_long" = "8" ; then AC_DEFINE(HAVE_LONG_LONG_64,1,[Define if long long is 64 bits]) @@ -849,7 +866,7 @@ fi # -if test -n "$longlongsize"; then +if test -n "$ac_cv_sizeof_long_long"; then if test $WITHIN_KNOWN_MPI_IMPL = no ; then PAC_MPI_LONG_LONG_INT else @@ -1132,7 +1149,6 @@ if test $PVFS2_CONFIG != "notfound" ; then # --with-file-system (or both) CFLAGS="$CFLAGS $( $PVFS2_CONFIG --cflags)" LIBS="$LIBS $( $PVFS2_CONFIG --libs)" - ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS" FILE_SYSTEM="pvfs2 $FILE_SYSTEM" file_system_pvfs2=1 fi @@ -1185,8 +1201,20 @@ fi if test -n "$file_system_testfs"; then AC_DEFINE(ROMIO_TESTFS,1,[Define for ROMIO with TESTFS]) fi +# +# Verify presence of lustre/lustre_user.h +# if test -n "$file_system_lustre"; then - AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE]) + AC_CHECK_HEADERS(lustre/lustre_user.h, + AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE]), + AC_MSG_ERROR([LUSTRE support requested but cannot find lustre/lustre_user.h header file]), + [ + #include + #ifdef __linux__ + #include + #endif + ] + ) fi if test -n "$file_system_xfs"; then @@ -1271,6 +1299,13 @@ fi +if test -n "$file_system_zoidfs"; then + AC_CHECK_HEADERS(zoidfs.h, + AC_DEFINE(ROMIO_ZOIDFS,1,[Define for ROMIO with ZoidFD]), + AC_MSG_ERROR([ZoidFS support requested but cannot find zoidfs.h header file]) + ) +fi + # # Verify presence of pvfs2.h # @@ -1285,8 +1320,8 @@ fi # layout change after pvfs-2.6.3: if test -n "$file_system_pvfs2"; then - AC_COMPILE_IFELSE( - [ + AC_COMPILE_IFELSE([ + AC_LANG_SOURCE([ #include #include "pvfs2.h" int main(int argc, char **argv) { @@ -1294,7 +1329,7 @@ if test -n "$file_system_pvfs2"; then PVFS_sys_attr attr; PVFS_sys_create(NULL, ref, attr, NULL, NULL, NULL, NULL); return 0; } - ], + ])], , AC_DEFINE(HAVE_PVFS2_CREATE_WITHOUT_LAYOUT, 1, [Define if PVFS_sys_create does not have layout parameter]) ) @@ -1309,15 +1344,10 @@ if test -n "$file_system_bgl"; then SYSDEP_INC=-I${prefix}/include else SYSDEP_INC= -# + # Check for presence and characteristics of async. I/O calls if # not disabled. -# -# Q: Do we need to list some "other libs" to get things to link? -# -save_libs=$LIBS -LIBS= -# + # Some systems need pthreads to get AIO to work. However, we don't want # to add pthreads just because it is there, as that can cause problems # with some implementations of pthreads and compilers (e.g., gcc version 3 @@ -1326,59 +1356,16 @@ LIBS= # *not linked* with pthreads. # if test "x$disable_aio" = "xno" ; then - foundPTHREAD=no - - # Do we have aio_write in aio or rt? - saveLIBS=$LIBS - LIBS= - AC_SEARCH_LIBS(aio_write,aio rt,foundAIO=yes,foundAIO=no) - if test "$foundAIO" = yes ; then - AIOLIBS=$LIBS - LIBS="$saveLIBS $LIBS" - else - LIBS="$saveLIBS" - fi - - # If not, try finding pthread_create first, and if found, try the - # test again. - if test "$foundAIO" = no ; then - saveLIBS=$LIBS - LIBS= - AC_SEARCH_LIBS(pthread_create,pthread,foundPTHREAD=yes, - foundPTHREAD=no) + AC_SEARCH_LIBS(aio_write,aio rt,aio_write_found=yes,aio_write_found=no) + if test "$aio_write_found" = no ; then + # If not found, try finding pthread_create first, and if + # found, try the test again. + AC_SEARCH_LIBS(pthread_create,pthread,foundPTHREAD=yes,foundPTHREAD=no) if test "$foundPTHREAD" = yes ; then - AC_SEARCH_LIBS(aio_write,aio rt,foundAIO=yes,foundAIO=no) - - if test "$foundAIO" = yes ; then - AIO_LIBS=$LIBS - LIBS="$saveLIBS $LIBS" - else - LIBS=$saveLIBS - fi - else - LIBS=$saveLIBS + AC_SEARCH_LIBS(aio_write,aio rt,aio_write_found=yes,aio_write_found=no) fi fi - - - if test "$foundAIO" = yes ; then - ROMIO_LIBLIST="$ROMIO_LIBLIST $AIOLIBS" - MPI_LIB="$MPI_LIB $AIOLIBS" - aio_write_found=yes - fi - -dnl AC_SEARCH_LIBS(pthread_create,pthread, -dnl ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS" -dnl MPI_LIB="$MPI_LIB $LIBS" -dnl ) -dnl LIBS= -dnl AC_SEARCH_LIBS(aio_write,aio rt, -dnl ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS" -dnl MPI_LIB="$MPI_LIB $LIBS" -dnl aio_write_found=yes -dnl ) fi -LIBS=$save_libs if test "x$disable_aio" = "xno" -a -n "$aio_write_found" ; then AC_CHECK_HEADERS(signal.h) @@ -1415,14 +1402,13 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = # The test is the following: if not cross compiling, try to run a # program that includes a *reference* to aio_write but does not call it # If the libraries are not set up correctly, then this will fail. - save_libs=$LIBS - LIBS="$LIBS $ROMIO_LIBLIST" + AC_MSG_CHECKING([whether aio routines can be used]) # Include aio.h and the aiocb struct (since we'll need these to # actually use the aio_write interface). Note that this will # fail for some pre-POSIX implementations of the aio interface # (an old IBM interface needs an fd argument as well) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_TRY_RUN([ #include #ifdef HAVE_SIGNAL_H #include @@ -1433,22 +1419,24 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = #ifdef HAVE_SYS_AIO_H #include #endif -],[ + int main(int argc, char **argv) + { struct aiocb *aiocbp; - aio_write(aiocbp); + if (argc > 10) aio_write(aiocbp); return 0; -])], - [aio_runs=yes - AC_MSG_RESULT(yes)], - [aio_runs=no - AC_MSG_RESULT(no)] - [aio_runs=no - AC_MSG_RESULT(no: aio routines disabled when cross compiling)] + } + ], + aio_runs=yes + AC_MSG_RESULT(yes), + aio_runs=no + AC_MSG_RESULT(no), + aio_runs=no + AC_MSG_RESULT(no: aio routines disabled when cross compiling) ) if test "$aio_runs" != "no" ; then AC_DEFINE(ROMIO_HAVE_WORKING_AIO, 1, Define if AIO calls seem to work) fi - + # now about that old IBM interface... # modern AIO interfaces have the file descriptor in the aiocb structure, # and will set ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES. Old IBM @@ -1460,7 +1448,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = # aio_read correctly AC_MSG_CHECKING([for obsolete two-argument aio_write]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_TRY_RUN([ #include #ifdef HAVE_SIGNAL_H #include @@ -1471,18 +1459,20 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = #ifdef HAVE_SYS_AIO_H #include #endif -],[ + int main(int argc, char **argv) + { int fd; struct aiocb *aiocbp; - aio_write(fd, aiocbp); + if (argc > 10) aio_write(fd, aiocbp); return 0; -])], - [aio_two_arg_write=yes - AC_MSG_RESULT(yes)], - [aio_two_arg_write=no - AC_MSG_RESULT(no)] - [aio_two_arg_write=no - AC_MSG_RESULT(no: cannot test when cross-compiling)] + } + ], + aio_two_arg_write=yes + AC_MSG_RESULT(yes), + aio_two_arg_write=no + AC_MSG_RESULT(no), + aio_two_arg_write=no + AC_MSG_RESULT(no: cannot test when cross-compiling) ) if test "$aio_two_arg_write" != "no" -a "$aio_runs" != "yes" ; then @@ -1491,7 +1481,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = fi AC_MSG_CHECKING([for obsolete two-argument aio_suspend]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_TRY_RUN([ #include #ifdef HAVE_SIGNAL_H #include @@ -1502,25 +1492,25 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = #ifdef HAVE_SYS_AIO_H #include #endif -],[ + int main(int argc, char **argv) + { struct aiocb *aiocbp; - aio_suspend(1, &aiocbp); + if (argc > 10) aio_suspend(1, &aiocbp); return 0; -])], - [aio_two_arg_suspend=yes - AC_MSG_RESULT(yes)], - [aio_two_arg_suspend=no - AC_MSG_RESULT(no)] - [aio_two_arg_suspend=no - AC_MSG_RESULT(no: cannot test when cross compiling)] + } + ], + aio_two_arg_suspend=yes + AC_MSG_RESULT(yes), + aio_two_arg_suspend=no + AC_MSG_RESULT(no), + aio_two_arg_suspend=no + AC_MSG_RESULT(no: cannot test when cross compiling) ) if test "$aio_two_arg_suspend" != "no" -a "$aio_runs" != "yes" ; then AC_DEFINE(ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS, 1, Define if aio_suspend needs two arguments) fi - LIBS=$save_libs - AC_MSG_CHECKING([for aio_fildes member of aiocb structure]) AC_TRY_COMPILE([ #ifdef HAVE_SIGNAL_H @@ -1646,7 +1636,6 @@ fi # Check for statfs (many) and specifically f_fstypename field (BSD) # AC_CHECK_HEADERS(sys/vfs.h sys/param.h sys/mount.h sys/statvfs.h) -AC_CHECK_FUNCS([statfs]) AC_MSG_CHECKING([whether struct statfs properly defined]) AC_TRY_COMPILE([ #ifdef HAVE_SYS_VFS_H @@ -1698,7 +1687,7 @@ fi # AC_CHECK_HEADERS(sys/stat.h sys/types.h unistd.h) AC_CHECK_FUNCS(stat, - [AC_DEFINE(HAVE_STAT, 1, Define if stat function is present) + AC_DEFINE(HAVE_STAT, 1, Define if stat function is present) AC_MSG_CHECKING([for st_fstype member of stat structure]) AC_TRY_COMPILE([ #ifdef HAVE_SYS_TYPES_H @@ -1719,14 +1708,14 @@ AC_CHECK_FUNCS(stat, AC_DEFINE(ROMIO_HAVE_STRUCT_STAT_WITH_ST_FSTYPE, 1, Define if struct stat has a st_fstype member), AC_MSG_RESULT(no) ) -]) +) # # Check for statvfs and f_basetype field (Solaris, Irix, AIX, etc.) # AC_CHECK_HEADERS(sys/types.h sys/statvfs.h sys/vfs.h) AC_CHECK_FUNCS(statvfs, - [AC_DEFINE(HAVE_STATVFS, 1, Define if statvfs function is present) + AC_DEFINE(HAVE_STATVFS, 1, Define if statvfs function is present) AC_MSG_CHECKING([for f_basetype member of statvfs structure]) AC_TRY_COMPILE([ #ifdef HAVE_SYS_TYPES_H @@ -1747,7 +1736,7 @@ AC_CHECK_FUNCS(statvfs, AC_DEFINE(ROMIO_HAVE_STRUCT_STATVFS_WITH_F_BASETYPE, 1, defined if struct statvfs has a f_basetype member), AC_MSG_RESULT(no) ) -]) +) # # Check for large file support. Make sure that we can use the off64_t @@ -2002,9 +1991,9 @@ elif test $FROM_MPICH2 = yes ; then # Turn off the building of the Fortran interface and the Info routines EXTRA_DIRS="" AC_DEFINE(HAVE_STATUS_SET_BYTES,1,[Define if status_set_bytes available]) - DEFINE_HAVE_MPI_GREQUEST="#define HAVE_MPI_GREQUEST" - # Add the MPICH2_INCLUDE_FLAGS to CPPFLAGS - CPPFLAGS="$CPPFLAGS $MPICH2_INCLUDE_FLAGS" + DEFINE_HAVE_MPI_GREQUEST="#define HAVE_MPI_GREQUEST 1" +# Open MPI does not have the MPIU functions +# AC_DEFINE(HAVE_MPIU_FUNCS,1,[Define if MPICH2 memory tracing macros defined]) fi # # @@ -2014,6 +2003,7 @@ fi if test $WITHIN_KNOWN_MPI_IMPL = no ; then PAC_TEST_MPIR_STATUS_SET_BYTES PAC_TEST_MPI_GREQUEST + PAC_TEST_MPIU_FUNCS AC_DEFINE(PRINT_ERR_MSG,1,[Define for printing error messages]) fi # @@ -2054,6 +2044,11 @@ if test "$ac_cv_func_ftruncate" = "yes" ; then # Do we need to declare ftruncate? PAC_FUNC_NEEDS_DECL([#include ],ftruncate) fi + +AC_CHECK_FUNCS(lseek64) +if test "$ac_cv_func_lseek64" = "yes" ; then + PAC_FUNC_NEEDS_DECL([#include ],lseek64) +fi # # Create the directory lists for the Makefile FILE_SYS_DIRS="" @@ -2149,7 +2144,7 @@ CFLAGS="$CFLAGS $OMPI_CFLAGS "'-I$(top_builddir)/include' # Open MPI - AM doesn't want the following: # VPATH, CC, CPPFLAGS, CFLAGS, AR, RANLIB, F77, MAKE #VPATH='VPATH = .:${srcdir}' -#AC_SUBST(VPATH) +#AC_SUBST(VPATH)# AC_SUBST(ARCH) AC_SUBST(FILE_SYSTEM) #AC_SUBST(CC) @@ -2208,7 +2203,6 @@ AC_SUBST(TEST_CC) AC_SUBST(TEST_F77) AC_SUBST(ROMIO_INCLUDE) AC_SUBST(ROMIO_LFLAGS) -AC_SUBST(ROMIO_LIBLIST) AC_SUBST(ROMIO_TCFLAGS) AC_SUBST(ROMIO_TCPPFLAGS) AC_SUBST(ROMIO_TFFLAGS) @@ -2268,6 +2262,7 @@ AC_CONFIG_FILES([ adio/ad_testfs/Makefile adio/ad_ufs/Makefile adio/ad_xfs/Makefile + adio/ad_zoidfs/Makefile adio/common/Makefile adio/include/Makefile @@ -2309,6 +2304,7 @@ AC_CONFIG_FILES([ # adio/ad_piofs/Makefile: old and no longer used AC_OUTPUT # +# # Open MPI - don't need to remove this... # rm -f *.o diff --git a/ompi/mca/io/romio/romio/doc/makepubpage.sh b/ompi/mca/io/romio/romio/doc/makepubpage.sh new file mode 100644 index 0000000000..390519e580 --- /dev/null +++ b/ompi/mca/io/romio/romio/doc/makepubpage.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# if you have bibtext2html installed (http://www.lri.fr/~filliatr/bibtex2html/ +# but I know there are other packages by that name), then you can re-generate +# the "ROMIO publication page" +# (http://www.mcs.anl.gov/research/projects/romio/pubs.html) + +# If you update the command below, please be sure to retain the link to the +# older papers + +WEB_HOST=login3.mcs.anl.gov +WEB_DIR=/mcs/web/research/projects/romio + +bibtex2html -t "Papers using ROMIO" \ + --header "Please help us keep this list up to date. Contact mpich-discuss@mcs.anl.gov for any corrections or additions.

Last updated at $(date).

Recent publications

" \ + --footer "

Other publications

+

    +
  • Northwestern University CUCIS group +
  • Weikuan Yu +
  • Phillip Dickens +
  • Older ROMIO papers" \ + -r -d -both pubs.bib + +if [ $? -eq 0 ] ; then + scp pubs* ${WEB_HOST}:${WEB_DIR} +else + echo "error running bibtex2html. website not updated" +fi diff --git a/ompi/mca/io/romio/romio/doc/pubs.bib b/ompi/mca/io/romio/romio/doc/pubs.bib new file mode 100644 index 0000000000..5799bc305c --- /dev/null +++ b/ompi/mca/io/romio/romio/doc/pubs.bib @@ -0,0 +1,465 @@ +@InProceedings{gropp:selfconsist-io, + author={William Gropp and Dries Kimpe and Robert B. Ross and Rajeev Thakur and Jesper Larsson Tr\"aff}, + title={Self-Consistent {MPI-IO} Performance Requirements and Expectations}, + booktitle={Proc. of the 15th European PVM/MPI Users' Group Meeting (Euro + PVM/MPI 2008)}, + month={September}, + year={2008} +} + +@InProceedings{kimpe:log-based, + author={D. Kimpe and R. Ross and S. Vandewalle and S. Poedts}, + title={Transparent Log-Based Data Storage in {MPI-IO} Applications}, + booktitle={Proc. of the 14th European PVM/MPI Users' Group Meeting (Euro PVM/MPI 2007)}, + month={September}, + year={2007} +} + +@InProceedings{latham:gen-req, + author={Robert Latham and William Gropp and Robert B. Ross and Rajeev Thakur}, + title={Extending the {MPI-2} Generalized Request Interface}, + booktitle={Proc. of the 14th European PVM/MPI Users' Group Meeting (Euro PVM/MPI 2007)}, + month={September}, + year={2007}, + pages={223--232} +} + +@InProceedings{lee:mpiio-l, + author={Jonghyun Lee and Robert B. Ross and Scott Atchley and Micah Beck and + and Rajeev Thakur}, + title={{MPI-IO/L}: Efficient Remote {I/O} for {MPI-IO} via Logistical Networking}, + booktitle={Proceedings of the 20th IEEE International Parallel and Distributed + Processing Symposium (IPDPS 2006)}, + address={Rhodes Island, Greece}, + month={April}, + year={2006} +} + +@InProceedings{ross:atomic, + author = {Robert Ross and Robert Latham and William Gropp and Rajeev Thakur + and Brian Toonen}, + title = {Implementing {MPI-IO} Atomic Mode Without File System Support}, + booktitle = {Proceedings of CCGrid 2005}, + month = {May}, + year = {2005}, +} + +@InProceedings{latham:sharedfp, + author = {Robert Latham and Robert Ross and Rajeev Thakur}, + title = {Implementing {MPI-IO} Shared File Pointers Without File + System Support}, + booktitle = {Proceedings of EuroPVM/MPI 2005}, + month={September}, + year = {2005}, +} + +@article{latham:rma-ops, + author = {Latham, Robert and Ross, Robert and Thakur, Rajeev}, + title = {{Implementing MPI-IO Atomic Mode and Shared File Pointers Using MPI + One-Sided Communication}}, + journal = {International Journal of High Performance Computing Applications}, + volume = {21}, + number = {2}, + pages = {132-143}, + doi = {10.1177/1094342007077859}, + year = {2007}, + abstract = {The ROMIO implementation of the MPI-IO standard provides a + portable infrastructure for use on top of a variety of underlying storage + targets. These targets vary widely in their capabilities, and in some cases + additional effort is needed within ROMIO to support all MPI-IO semantics. Two + aspects of the interface that can be problematic to implement are MPI-IO + atomic mode and the shared file pointer access routines. Atomic mode requires + enforcing strict consistency semantics, and shared file pointer routines + require communication and coordination in order to atomically update a shared + resource. For some file systems, native locks may be used to implement these + features, but not all file systems have lock support. In this work, we + describe algorithms for implementing efficient mutex locks using MPI-1 and + the one-sided capabilities from MPI-2. We then show how these algorithms may + be used to implement both MPI-IO atomic mode and shared file pointer + methods for ROMIO without requiring any features from the underlying file + system. We show that these algorithms can outperform traditional file system + lock approaches. Because of the portable nature of these algorithms, they are + likely useful in a variety of situations where distributed locking or + coordination is needed in the MPI-2 environment. }, + URL = {http://hpc.sagepub.com/cgi/content/abstract/21/2/132}, + pdf = {papers/latham_rmaops.pdf}, + eprint = {http://hpc.sagepub.com/cgi/reprint/21/2/132.pdf} +} + +@Article{latham:mpi-io-scalability, + author = {Rob Latham and Rob Ross and Rajeev Thakur}, + title = {The impact of file systems on {MPI-IO} scalability}, + journal = {Lecture Notes in Computer Science}, + booktitle = {11th European Parallel Virtual Machine and Message Passing + Interface Users Group Meeting; September 19-22, 2004; Budapest, HUNGARY}, + editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J}, + year = {2004}, + month = {September}, + volume = {3241}, + pages = {87--96}, + institution = {Argonne Natl Lab, 9700 S Cass Ave, Argonne, IL 60439 USA; + Argonne Natl Lab, Argonne, IL 60439 USA}, + publisher = {Springer-Verlag Heidelberg}, + copyright = {(c)2004 Institute for Scientific Information, Inc.}, + URL = {http://www.springerlink.com/link.asp?id=m31px2lt90296b62}, + pdf = {papers/latham:scalable_ops.pdf}, + keywords = {scalability analysis, MPI-IO, pario-bib}, + abstract = {As the number of nodes in cluster systems continues to grow, + leveraging scalable algorithms in all aspects of such systems becomes key to + maintaining performance. While scalable algorithms have been applied + successfully in some areas of parallel I/O, many operations are still + performed in an uncoordinated manner. In this work we consider, in three file + system scenarios, the possibilities for applying scalable algorithms to the + many operations that make up the MPI-IO interface. From this evaluation we + extract a set of file system characteristics that aid in developing scalable + MPI-IO implementations.} +} + + + +@InProceedings{thakur:byte-range, + author = {Rajeev Thakur and Robert Ross and Robert Latham}, + title = {Implementing Byte-Range Locks Using {MPI} One-Sided Communication}, + booktitle = {Proceedings of the 12th European PVM/MPI Users' Group Meeting (Euro PVM/MPI 2005), Recent Advances in Parallel Virtual Machine and Message Passing Interface, Lecture Notes in Computer Science, LNCS 3666, Springer}, + month = {September}, + year = {2005}, + pages = {119-128}, +} + +@InProceedings{gropp:io-redundancy, + author={William Gropp and Robert B. Ross and Neill Miller}, + title={Providing Efficient {I/O} Redundancy in {MPI} Environments}, + booktitle={Proceedings of EuroPVM/MPI 2004}, + month={September}, + year={2004} +} + +@InProceedings{lee:rfs, + author = {Jonghyun Lee and Xiaosong Ma and Robert B. Ross and Rajeev Thakur + and Marianne Winslett}, + title={{RFS}: Implementing Efficient and Flexible Remote File Access for + {MPI-IO}}, + booktitle={Proceedings of Cluster 2004}, + month={September}, + year={2004} +} + +@InProceedings{yu:bgl-io, + author = {Hao Yu and R. K. Sahoo and C. Howson and George. Almasi and + J. G. Castanos and M. Gupta and Jose. E. Moreira and J. J. Parker and + T. E. Engelsiepen and Robert Ross and Rajeev Thakur and Robert Latham + and W. D. Gropp}, + title = {High Performance File {I/O} for the {BlueGene/L} Supercomputer}, + booktitle = {Proceedings of the 12th International Symposium on High-Performance Computer Architecture (HPCA-12)}, + month = {February}, + year = {2006}, + url = {http://www.mcs.anl.gov/~thakur/papers/bgl-io.pdf}, +} + +@Article{Hastings:pio-shmem, + author = {Andrew B. Hastings and Alok Choudhary}, + title = {Exploiting Shared Memory to Improve Parallel {I/O} Performance}, + journal = {Lecture Notes in Computer Science}, + booktitle = {13th European PVM/MPI User's Group Meeting; September 17-20, 2006; Bonn, Germany}, + editor = {Mohr, B; Worringen, J; Dongarra, J}, + year = {2006}, + month = {September}, + volume = {4192}, + pages = {212-221}, + publisher = {Springer-Verlag Heidelberg}, +} + +@Article{seidel:memfs, + author = {Jan Seidel and Rudolf Berrendorf and Marcel Birkner and Marc-Andre Hermanns}, + title = {High-Bandwidth Remote Parallel {I/O} with the Distributed Memory Filesystem {MEMFS}}, + journal = {Lecture Notes in Computer Science}, + booktitle = {13th European PVM/MPI User's Group Meeting; September 17-20, 2006; Bonn, Germany}, + editor = {Mohr, B; Worringen, J; Dongarra, J}, + year = {2006}, + month = {September}, + volume = {4192}, + pages = {222-229}, + publisher = {Springer-Verlag Heidelberg}, +} + +@InProceedings{calderon:ft-mpiio, + author={A. Calderon and F. Garcia-Carballeira and Florin Isaila and Rainer Keller and Alexander Schulz}, + title={Faul Tolerant File Models for {MPI-IO} Parallel File Systems}, + booktitle={Proc. of the 14th European PVM/MPI Users' Group Meeting (Euro PVM/MPI 2007)}, + month={September}, + year={2007}, + pages={153-160} +} +@InProceedings{Blas:romio-gpfs, + author={Francisco Javier Garc{\'i}a Blas and Florin Isaila and Jes{\'u}s Carretero + and Thomas Grossmann}, + title={Implementation and Evaluation of an {MPI-IO} Interface for + {GPFS} in {ROMIO}}, + booktitle={Proc. of the 15th European PVM/MPI Users' Group Meeting (Euro + PVM/MPI 2008)}, + month={September}, + year={2008} +} + +@InProceedings{dickens:y-lib, + author = {Dickens, Phillip M. and Logan, Jeremy}, + title = {Y-lib: a user level library to increase the performance of {MPI-IO} in a lustre file system environment}, + booktitle = {HPDC '09: Proceedings of the 18th ACM international symposium on High performance distributed computing}, + year = {2009}, + isbn = {978-1-60558-587-1}, + pages = {31--38}, + location = {Garching, Germany}, + doi = {http://doi.acm.org/10.1145/1551609.1551617}, + publisher = {ACM}, + address = {New York, NY, USA}, + } + + +% these ROMIO-related papers won't be published until EuroPVM/MPI 2009 + +@InProceedings{buettner:nonblocking-io, + author = {David Buettner and Julian Kunkel and Thomas Ludwig}, + title = {Using Non-Blocking {I/O} Operations in High Performance Computing to Reduce Execution Times}, + booktitle = {To be Published in Proc. of the 16th European PVM/MPI User's Group Meeting (Euro PVM/MPI 2009)}, + month={September}, + year={2009} + } + +@InProceedings{sehrish:conflicts, + author = {Saba Sehrish and Jun Wang and Rajeev Thakur}, + title = {Conflict Detection Algorithm to Minimize Locking for {MPI-IO} Atomicity}, + booktitle = {To be Published in Proc. of the 16th European PVM/MPI User's Group Meeting (Euro PVM/MPI 2009)}, + month={September}, + year={2009} +} + +@InProceedings{blas:bg-writeback, + author = {Javier Garc{\'i}a Blas and Florin Isaila and Jes{\'u}s Carretero and Robert Latham and Robert Ross}, + title = {Multiple-level {MPI} file write-back and prefetching for {Blue Gene} systems}, + booktitle = {To be Published in Proc. of the 16th European PVM/MPI User's Group Meeting (Euro PVM/MPI 2009)}, + month={September}, + year={2009} +} + +@InProceedings{isaila:ahpios, + author = {Florin Isaila and Javier Garc{\'i}a Blas and Jes{\'u}s Carretero and Wei-keng Liao and Alok Choudhary}, + title = {{AHPIOS}: An {MPI-based} ad-hoc parallel {I/O} system}, + booktitle = {14th Intl Conference on Parallel and Distributed Systems. Melbourne, Victoria, Austraila}, + year={2008} +} + +@InProceedings{blas:viewio, + author = {Javier Garc{\'i}a Blas and Florin Isaila and David E. Singh and Jes{\'u}s Carretero}, + title ={View-based collective {I/O} for {MPI-IO}}, + booktitle={8th IEEE International Symposium on Cluster Computing and the Grid}, + year={2008} +} + +@InProceedings{yu:lustre-joining, + author = {Weikuan Yu and Jeffrey Vetter and R. Shane Canon and Song Jiang}, + title = {Exploiting {Lustre} File Joining for Effective Collective {IO}}, + booktitle = {Seventh IEEE International Symposium on Cluster Computing and the Grid (CCGrid 2007)}, + month = {May}, + year = {2007}, +} + +@InProceedings{yu:opal, + author = {Weikuan Yu and Jeffrey Vetter and Shane Canon}, + title = {{OPAL}: An Open-Source {MPI-IO} Library over {Cray XT}}, + booktitle = {International Workshop on Storage Network Architecture and Parallel I/O (SNAPI'07)}, + month={September}, + year = {2007} +} + +@InProceedings{dickens:lustre-mpi, + title = {Towards a High Performance Implementation of {MPI-IO} on the {Lustre} File System}, + author = { Phillip M. Dickens and Jeremy Logan}, + booktitle = {Proceedings of GADA'08: Grid computing, high-performAnce and Distributed Applications. Monterrey, Mexico}, + month = {November}, + year = {2008} +} + +@article{dickens:lustre-mpi-journal, + author = {Phillip Dickens and J. Logan}, + title = {A High Performance Implementation of {MPI-IO} for a {Lustre} File System Environment}, + journal = {To appear: Concurrency and Computation: Practice and Experience}, + year = {2009} +} + +@InProceedings{logan:objectio, + author = {J. Logan and Phillip Dickens}, + title = {Using Object Based Files for High Performance Parallel {I/O}}, + booktitle = {Proceedings of the IEEE 4th International Workshop on Intelligent Data acquisition and Advanced Computing Systems: Technology and Applications. Dortmund, Germany}, + month = {September}, + year = {2007} + } + +@InProceedings{logan:dynamic-object-io, + author = {J. Logan and Phillip Dickens}, + title = {Dynamic Object Based {I/O}. (Invited Paper)}, + booktitle = {To appear: IEEE 5th International Workshop on Intelligent Data + acquisition and Advanced Computing Systems: Technology and Applications}, + year = {2009}, + month = {August} +} + + +@InProceedings{liao:partitioning, + author = {Wei-keng Liao and Alok Choudhary}, + title = {Dynamically Adapting File Domain Partitioning Methods for Collective {I/O} Based on Underlying Parallel File System Locking Protocols}, + booktitle = {Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis (SC08), Austin, Texas}, + month={November}, + year = {2008} +} + +@InProceedings{nisar:io-delegation, + author = {Arifa Nisar and Wei-keng Liao and Alok Choudhary}, + title = {Scaling Parallel {I/O} Performance through {I/O} Delegate and Caching System}, + booktitle = {Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis (SC08), Austin, Texas}, + month={November}, + year ={2008} +} + +@InProceedings{liao:caching-large-scale, + author = { Wei-keng Liao and Avery Ching and Kenin Coloma and Arifa Nisar and Alok Choudhary and Jackie Chen and Ramanan Sankaran and Scott Klasky}, + title = {Using {MPI} File Caching to Improve Parallel Write Performance for Large-Scale Scientific Applications}, + booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2007), Reno, Nevada}, + month = {November}, + year = {2007} +} + +@article{liao:cooperative_caching_mpi_journal, + author = {Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward}, + title = {Cooperative Client-side File Caching for {MPI} Applications}, + journal = {International Journal of High Performance Computing Applications}, + volume = {21}, + number = {2}, + pages = {144-154}, + month = {May}, + year = {2007} +} + +@InProceedings{liao:client_cache_eval, + author = {Wei-keng Liao and Avery Ching and Kenin Coloma and Alok Choudhary and Lee Ward}, + title = {An Implementation and Evaluation of Client-side File Caching for {MPI-IO}}, + booktitle = {Proceedings of the 21st International Parallel and Distributed +Processing Symposium (IPDPS), Long Beach, California}, + month = {March}, + year = {2007} +} + +@article{liao:overlapping_io, + author = {Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward and Eric Russell and Neil Pundit}, + title = {Scalable Design and Implementations for MPI Parallel Overlapping {I/O}}, + journal = {{IEEE} Transactions on Parallel and Distributed System}, + volume = {17}, + number = {11}, + pages = {1264-1276}, + month = {November}, + year = {2006} +} + + +@InProceedings{coloma:new_collective_io, + author = {Kenin Coloma and Avery Ching and Alok Choudhary and Wei-keng Liao and Robert Ross and Rajeev Thakur and Lee Ward}, + title = {A New Flexible {MPI} Collective {I/O} Implementation}, + booktitle = {Proceedings of the IEEE Conference on Cluster Computing (Cluster 2006), Barcelona, Spain}, + month = {September}, + year = {2006} +} + +@article{memik:multicollective_io, + author = {Gokhan Memik and Mahmut T. Kandemir and Wei-Keng Liao and Alok Choudhary}, + title = {Multicollective {I/O}: A Technique for Exploiting Inter-file Access Patterns}, + journal = {{ACM} Transactions on Storage (TOS)}, + volume = {2}, + number = {3}, + pages = {349-360}, + month = {August}, + year = {2006} +} + +@Inbook{liao:atomicity_overlap, + author = {Wei-keng Liao and Alok Choudhary and Kenin Coloma and Lee Ward and Eric Russell and Neil Pundit}, + title = {{MPI} Atomicity and Concurrent Overlapping {I/O}}, + booktitle = {High Performance Computing: Paradigm and Infrastructure}, + pages = {203-218}, + chapter = {10}, + month = {November}, + year = {2005}, + publisher = {John Wiley & Sons Inc}, + note = {ISBN: 978-0-471-65471-1} +} + +@InProceedings{liao:cooperative_write-behind, + author = {Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward}, + title = {{Cooperative Write-Behind Data Buffering for MPI I/O}}, + booktitle = {Proceedings of the 12th European Parallel Virtual Machine and Message Passing Interface Conference (EURO PVM/MPI), Sorrento (Naples), Italy}, + month = {September}, + year = {2005} + } + +@InProceedings{liao:app_aware_caching, + author = {Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward and Eric Russell and Sonja Tideman}, + title = {{Collective Caching: Application-aware Client-side File Caching}}, + booktitle = {Proceedings of the 14th IEEE International Symposium on High Performance Distributed Computing (HPDC-14), Research Triangle Park, NC}, + month = {July}, + year = {2005} + } + +@InProceedings{coloma:dache, + author = {Kenin Coloma and Alok Choudhary and Wei-keng Liao and Lee Ward and Sonja Tideman}, + title = {{DAChe: Direct Access Cache System for Parallel I/O}}, + booktitle = {Proceedings of the International Supercomputer Conference, Heidelberg, Germany}, + month = {June}, + year = {2005} + } + +@InProceedings{coloma:scalable_caching, + author = {Kenin Coloma and Alok Choudhary and Wei-keng Liao and Lee Ward and Eric Russell and Neil Pundit}, + title = {{Scalable High-level Caching for Parallel I/O}}, + booktitle = {Proceedings of the International Parallel and Distributed Parallel Processing Symposium (IPDPS), New Mexico}, + month = {April}, + year = {2004} +} + +@InProceedings{Liao:scalable_atomicity, + author = {Wei-keng Liao and Alok Choudhary and Kenin Coloma and George K. Thiruvathukal and Lee Ward and Eric Russell and Neil Pundit}, + title = {{Scalable Implementations of MPI Atomicity for Concurrent Overlapping I/O}}, + booktitle = {Proceedings of the International Conference on Parallel Processing (ICPP), Kaohsiung, Taiwan}, + month = {October}, + year = {2003} + } + +@InProceedings{ching:noncontig, + author = {Avery Ching and Alok Choudhary and Kenin Coloma and Wei-keng Liao and Robert Ross and William Gropp}, + title = {{Noncontiguous I/O Accesses Through MPI-IO}}, + booktitle = {Proceedings of the International Symposium on Cluster Computing and the Grid (CCGrid), Tokyo, Japan}, + month = {May}, + year = {2003} +} + +@Article{chaarawi:collective_writes, + author = {Mohamad Chaarawi and Suneet Chandok and Edgar Gabriel}, + title = {{Performance Evaluation of Collective Write Algorithms in MPI I/O}}, + journal = {Lecture Notes in Computer Science}, + booktitle = {International Conference on Computer Science (ICCS) 2009}, + month = {May}, + year = {2009}, + volume = {5544}, + pages = {185-194}, + publisher = {Springer Berlin / Heidelberg}, + doi = {10.1007/978-3-642-01970-8_19} +} +@Article{kulkarni::shared_file_pointers, + author = {Ketan Kulkarni and Edgar Gabriel}, + title = {{Evaluating Algorithms for Shared File Pointer Operations in MPI I/O}}, + journal = {Lecture Notes in Computer Science}, + booktitle = {International Conference on Computer Science (ICCS) 2009}, + month = {May}, + year = {2009}, + volume = {5544}, + pages = {280-289}, + publisher = {Springer Berlin / Heidelberg}, + doi = {10.1007/978-3-642-01970-8_28} +} diff --git a/ompi/mca/io/romio/romio/doc/users-guide.tex b/ompi/mca/io/romio/romio/doc/users-guide.tex index 8706239bef..b19c61ad65 100644 --- a/ompi/mca/io/romio/romio/doc/users-guide.tex +++ b/ompi/mca/io/romio/romio/doc/users-guide.tex @@ -21,11 +21,11 @@ ANL/MCS-TM-234 \\ \rule{1.75in}{.01in} \\ -\vskip 1.3 in +\vskip 1.3in {\Large\bf Users Guide for ROMIO: A High-Performance, \\ [1ex] Portable MPI-IO Implementation} \\ [4ex] by \\ [2ex] -{\large\it Rajeev Thakur, Robert Ross, Ewing Lusk, and William Gropp} +{\large\it Rajeev Thakur, Robert Ross, Ewing Lusk, William Gropp, Robert Latham} \vspace{1in} Mathematics and Computer Science Division @@ -36,7 +36,7 @@ Technical Memorandum No.\ 234 \vspace{1.4in} -Revised May 2004 +Revised May 2004, November 2007, April 2010 \end{center} @@ -215,8 +215,13 @@ Subsection~\ref{sec:hints}. \subsection{Hints} \label{sec:hints} +If ROMIO doesn't understand a hint, or if the value is invalid, the hint +will be ignored. The values of hints being used by ROMIO for a file +can be obtained at any time via {\tt MPI\_File\_get\_info}. + The following hints control the data sieving optimization and are applicable to all file system types: + \begin{itemize} \item \texttt{ind\_rd\_buffer\_size} -- Controls the size (in bytes) of the intermediate buffer used by ROMIO when performing data sieving during @@ -370,12 +375,15 @@ first be written to. This is a number in the range of 0 ... striping\_factor - 1. \end{itemize} -Also for PFS: +\subsubsection{Hints for PFS} +\label{sec:hints_pfs} \begin{itemize} \item \texttt{pfs\_svr\_buf} -- Turns on PFS server buffering. Valid values are \texttt{true} and \texttt{false}. Default is \texttt{false}. \end{itemize} +\subsubsection{Hints for XFS} +\label{sec:hints_xfs} For XFS control is provided for the direct I/O optimization: \begin{itemize} \item \texttt{direct\_read} -- Controls direct I/O for reads. Valid @@ -384,6 +392,9 @@ values are \texttt{true} and \texttt{false}. Default is \texttt{false}. values are \texttt{true} and \texttt{false}. Default is \texttt{false}. \end{itemize} +\subsubsection{Hints for PVFS (v1)} +\label{sec:hints_oldpvfs} + For PVFS control is provided for the use of the listio interface. This interface to PVFS allows for a collection of noncontiguous regions to be requested (for reading or writing) with a single operation. This can result @@ -400,9 +411,126 @@ Valid values are \texttt{enable}, \texttt{disable}, and \texttt{automatic}. Default is \texttt{disable}. \end{itemize} -If ROMIO doesn't understand a hint, or if the value is invalid, the hint -will be ignored. The values of hints being used by ROMIO for a file -can be obtained at any time via {\tt MPI\_File\_get\_info}. +\subsubsection{Hints for PVFS (v2)} +\label{sec:hints_pvfs} + +The PVFS v2 file system has many tuning parameters. +\begin{itemize} +\item dtype i/o +\end{itemize} + +\subsubsection{Hints for Lustre} + +\begin{itemize} +\item romio\_lustre\_co\_ratio + +In stripe-contiguous IO pattern, each OST will be accessed by a group of +IO clients. CO means *C*lient/*O*ST ratio, or the max. number of IO clients +for each OST. +CO=1 by default. + +\item \texttt{romio\_lustre\_coll\_threshold} + +We won't do collective I/O if this hint is set and the IO request size is +bigger than this value. That's because when the request size is big, the +collective communication overhead increases and the benefits from collective +I/O becomes limited. A value of 0 means always perform collective I/O + +\item \texttt{romio\_lustre\_cb\_ds\_threshold} + +ROMIO can optimize collective I/O with a version of data sieving. If the I/O +request is smaller than this hint's value, though, ROMIO will not try to apply +the data sieving optimization. + +\item \texttt{romio\_lustre\_ds\_in\_coll} + +Collective IO will apply read-modify-write to deal with non-contiguous +data by default. However, it will introduce some overhead(IO operation and +locking). The Lustre developers have run tests where data sieving showed bad +collective write performance for some kinds of workloads. So, to avoid this, +we define the \texttt{romio\_lustre\_ds\_in\_coll} hint to disable the read-modify-write +step in collective I/O. This optimization is distinct from the one in +independent I/O (controlled by \texttt{romio\_ds\_read} and +\texttt{romio\_ds\_write}). + +\end{itemize} + +\subsubsection{Hints for PANFS (Panasas)} + +PanFS allows users to specify the layout of a file at file-creation time. +Layout information includes the number of StorageBlades (SB) across which the +data is stored, the number of SBs across which a parity stripe is written, and +the number of consecutive stripes that are placed on the same set of SBs. The +\texttt{panfs\_layout\_*} hints are only used if supplied at file-creation +time. +\begin{itemize} + +\item \texttt{panfs\_layout\_type} Specifies the layout of a file: 2 = RAID0 +3 = RAID5 Parity Stripes + +\item \texttt{panfs\_layout\_stripe\_unit} The size of the stripe unit +in bytes + +\item \texttt{panfs\_layout\_total\_num\_comps} The total number of +StorageBlades a file is striped across. + +\item \texttt{ panfs\_layout\_parity\_stripe\_width} If the layout type is +RAID5 Parity Stripes, this hint specifies the number of StorageBlades in a +parity stripe. + +\item \texttt{panfs\_layout\_parity\_stripe\_depth} If the layout type is RAID5 +Parity Stripes, this hint specifies the number of contiguous parity stripes +written across the same set of SBs. + +\item \texttt{panfs\_layout\_visit\_policy} If the layout type is RAID5 Parity +Stripes, the policy used to determine the parity stripe a given file offset is +written to: 1 = Round Robin +\end{itemize} + +PanFS supports the ``concurrent write'' (CW) mode, where groups of +cooperating clients can disable the PanFS consistency mechanisms and use +their own consistency protocol. Clients participating in concurrent +write mode use application specific information to improve performance +while maintaining file consistency. All clients accessing the file(s) +must enable concurrent write mode. If any client does not enable +concurrent write mode, then the PanFS consistency protocol will be +invoked. Once a file is opened in CW mode on a machine, attempts to +open a file in non-CW mode will fail with EACCES. If a file is already +opened in non-CW mode, attempts to open the file in CW mode will fail +with EACCES. The following hint is used to enable concurrent write +mode. + +\begin{itemize} +\item \texttt{panfs\_concurrent\_write} If set to 1 at file open time, +the file is opened using the PanFS concurrent write mode flag. +Concurrent write mode is not a persistent attribute of the file. +\end{itemize} + +Below is an example PanFS layout using the following parameters: +\begin{verbatim} + + - panfs_layout_type = 3 + - panfs_layout_total_num_comps = 100 + - panfs_layout_parity_stripe_width = 10 + - panfs_layout_parity_stripe_depth = 8 + - panfs_layout_visit_policy = 1 + + Parity Stripe Group 1 Parity Stripe Group 2 . . . Parity Stripe Group 10 + ---------------------- ---------------------- -------------------- + SB1 SB2 ... SB10 SB11 SB12 ... SB20 ... SB91 SB92 ... SB100 + ----------------------- ----------------------- --------------------- + D1 D2 ... D10 D91 D92 ... D100 D181 D182 ... D190 + D11 D12 D20 D101 D102 D110 D191 D192 D193 + D21 D22 D30 . . . . . . + D31 D32 D40 + D41 D42 D50 + D51 D52 D60 + D61 D62 D70 + D71 D72 D80 + D81 D82 D90 D171 D172 D180 D261 D262 D270 + D271 D272 D273 . . . . . . + ... +\end{verbatim} \subsubsection{Systemwide Hints} \label{sec:system_hints} diff --git a/ompi/mca/io/romio/romio/include/mpio.h.in b/ompi/mca/io/romio/romio/include/mpio.h.in index a9e1ddbe60..3f94402ade 100644 --- a/ompi/mca/io/romio/romio/include/mpio.h.in +++ b/ompi/mca/io/romio/romio/include/mpio.h.in @@ -11,7 +11,6 @@ #define MPIO_INCLUDE #include "mpi.h" - /* Open MPI: We need to rename almost all of these functions, as well a the types to be names that conform to the prefix rule */ #include "io_romio_conv.h" @@ -266,10 +265,10 @@ MPI_File MPI_File_f2c(MPI_Fint); MPI_Fint MPI_File_c2f(MPI_File); #endif - -/* The foll. functions are required, because an MPIO_Request object - is currently used for nonblocking I/O. These functions will go away - after generalized requests are implemented. */ +#ifndef HAVE_MPI_GREQUEST +/* The following functions are required if generalized requests are not + available, because in that case, an MPIO_Request object + is currently used for nonblocking I/O. */ int MPIO_Test(MPIO_Request *, int *, MPI_Status *); int MPIO_Wait(MPIO_Request *, MPI_Status *); int MPIO_Testall(int, MPIO_Request *, int *, MPI_Status *); @@ -281,7 +280,7 @@ int MPIO_Testsome(int, MPIO_Request *, int *, int *, MPI_Status *); MPI_Fint MPIO_Request_c2f(MPIO_Request); MPIO_Request MPIO_Request_f2c(MPI_Fint); - +#endif /* HAVE_MPI_GREQUEST */ /* info functions if not defined in the MPI implementation */ #ifndef HAVE_MPI_INFO @@ -441,9 +440,10 @@ int PMPI_Type_create_darray(int, int, int, int *, int *, MPI_File PMPI_File_f2c(MPI_Fint); MPI_Fint PMPI_File_c2f(MPI_File); -/* The foll. functions are required, because an MPIO_Request object - is currently used for nonblocking I/O. These functions will go away - after generalized requests are implemented. */ +#ifndef HAVE_MPI_GREQUEST +/* The following functions are required if generalized requests are not + available, because in that case, an MPIO_Request object + is currently used for nonblocking I/O. */ int PMPIO_Test(MPIO_Request *, int *, MPI_Status *); int PMPIO_Wait(MPIO_Request *, MPI_Status *); int PMPIO_Testall(int, MPIO_Request *, int *, MPI_Status *); @@ -454,7 +454,7 @@ int PMPIO_Waitsome(int, MPIO_Request *, int *, int *, MPI_Status *); int PMPIO_Testsome(int, MPIO_Request *, int *, int *, MPI_Status *); MPI_Fint PMPIO_Request_c2f(MPIO_Request); MPIO_Request PMPIO_Request_f2c(MPI_Fint); - +#endif /* HAVE_MPI_GREQUEST */ /* info functions if not defined in the MPI implementation */ #ifndef HAVE_MPI_INFO diff --git a/ompi/mca/io/romio/romio/include/mpiof.h.in b/ompi/mca/io/romio/romio/include/mpiof.h.in index 71515ce4e5..fa945807bd 100644 --- a/ompi/mca/io/romio/romio/include/mpiof.h.in +++ b/ompi/mca/io/romio/romio/include/mpiof.h.in @@ -1,6 +1,4 @@ ! -! $Id: mpiof.h.in,v 1.3 1999/08/06 18:33:09 thakur Exp $ -! ! Copyright (C) 1997 University of Chicago. ! See COPYRIGHT notice in top-level directory. ! diff --git a/ompi/mca/io/romio/romio/localdefs.in b/ompi/mca/io/romio/romio/localdefs.in index d26086d53e..5b3d6706fb 100644 --- a/ompi/mca/io/romio/romio/localdefs.in +++ b/ompi/mca/io/romio/romio/localdefs.in @@ -1,4 +1,4 @@ #! /bin/sh -LIBS="$LIBS @ROMIO_LIBLIST@" +LIBS="@LIBS@" MPI_OFFSET_TYPE="@MPI_OFFSET_TYPE@" FORTRAN_MPI_OFFSET="@FORTRAN_MPI_OFFSET@" diff --git a/ompi/mca/io/romio/romio/man/man3/MPIO_Request_c2f.3 b/ompi/mca/io/romio/romio/man/man3/MPIO_Request_c2f.3 deleted file mode 100644 index 96ee72baa9..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPIO_Request_c2f.3 +++ /dev/null @@ -1,17 +0,0 @@ -.TH MPIO_Request_c2f 3 "11/3/1998" " " "MPI-2" -.SH NAME -MPIO_Request_c2f \- Translates a C I/O-request handle to a Fortran I/O-request handle -.SH SYNOPSIS -.nf -#include "mpi.h" -MPI_Fint MPIO_Request_c2f(MPIO_Request request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B request -- C I/O-request handle (handle) -.PD 1 - -.SH RETURN VALUE -Fortran I/O-request handle (integer) diff --git a/ompi/mca/io/romio/romio/man/man3/MPIO_Request_f2c.3 b/ompi/mca/io/romio/romio/man/man3/MPIO_Request_f2c.3 deleted file mode 100644 index 4c24c3dde9..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPIO_Request_f2c.3 +++ /dev/null @@ -1,17 +0,0 @@ -.TH MPIO_Request_f2c 3 "11/3/1998" " " "MPI-2" -.SH NAME -MPIO_Request_f2c \- Translates a Fortran I/O-request handle to a C I/O-request handle -.SH SYNOPSIS -.nf -#include "mpi.h" -MPIO_Request MPIO_Request_f2c(MPI_Fint request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B request -- Fortran I/O-request handle (integer) -.PD 1 - -.SH RETURN VALUE -C I/O-request handle (handle) diff --git a/ompi/mca/io/romio/romio/man/man3/MPIO_Test.3 b/ompi/mca/io/romio/romio/man/man3/MPIO_Test.3 deleted file mode 100644 index 1021bee7ba..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPIO_Test.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPIO_Test 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPIO_Test \- Test the completion of a nonblocking read or write -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B flag -- true if operation completed (logical) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPIO_Wait.3 b/ompi/mca/io/romio/romio/man/man3/MPIO_Wait.3 deleted file mode 100644 index c2bcb80438..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPIO_Wait.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPIO_Wait 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPIO_Wait \- Waits for the completion of a nonblocking read or write -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPIO_Wait(MPIO_Request *request, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_c2f.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_c2f.3 deleted file mode 100644 index c10f218da1..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_c2f.3 +++ /dev/null @@ -1,17 +0,0 @@ -.TH MPI_File_c2f 3 "11/3/1998" " " "MPI-2" -.SH NAME -MPI_File_c2f \- Translates a C file handle to a Fortran file handle -.SH SYNOPSIS -.nf -#include "mpi.h" -MPI_Fint MPI_File_c2f(MPI_File fh) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- C file handle (handle) -.PD 1 - -.SH RETURN VALUE -Fortran file handle (integer) diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_close.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_close.3 deleted file mode 100644 index de9504a2d7..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_close.3 +++ /dev/null @@ -1,24 +0,0 @@ -.TH MPI_File_close 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_close \- Closes a file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_close(MPI_File *fh) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_delete.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_delete.3 deleted file mode 100644 index e3cceec68e..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_delete.3 +++ /dev/null @@ -1,29 +0,0 @@ -.TH MPI_File_delete 3 "2/12/1998" " " "MPI-2" -.SH NAME -MPI_File_delete \- Deletes a file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_delete(char *filename, MPI_Info info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B filename -- name of file to delete (string) -.PD 1 -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_f2c.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_f2c.3 deleted file mode 100644 index dbeb597df7..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_f2c.3 +++ /dev/null @@ -1,17 +0,0 @@ -.TH MPI_File_f2c 3 "11/3/1998" " " "MPI-2" -.SH NAME -MPI_File_f2c \- Translates a Fortran file handle to a C file handle -.SH SYNOPSIS -.nf -#include "mpi.h" -MPI_File MPI_File_f2c(MPI_Fint fh) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- Fortran file handle (integer) -.PD 1 - -.SH RETURN VALUE -C file handle (handle) diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_amode.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_amode.3 deleted file mode 100644 index cd888a32b0..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_amode.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_amode 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_amode \- Returns the file access mode -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_amode(MPI_File fh, int *amode) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B amode -- access mode (integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_atomicity.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_atomicity.3 deleted file mode 100644 index ee2c120b91..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_atomicity.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_atomicity 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_atomicity \- Returns the atomicity mode -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_atomicity(MPI_File fh, int *flag) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B flag -- true if atomic mode, false if nonatomic mode (logical) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_byte_offset.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_byte_offset.3 deleted file mode 100644 index f2ef28b4ca..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_byte_offset.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_get_byte_offset 3 "3/4/1999" " " "MPI-2" -.SH NAME -MPI_File_get_byte_offset \- Returns the absolute byte position in the file corresponding to "offset" etypes relative to the current view -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, MPI_Offset *disp) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- offset (nonnegative integer) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B disp -- absolute byte position of offset (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_group.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_group.3 deleted file mode 100644 index 74ab1826e1..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_group.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_group 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_group \- Returns the group of processes that opened the file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_group(MPI_File fh, MPI_Group *group) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B group -- group that opened the file (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_info.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_info.3 deleted file mode 100644 index 2bdb1931f6..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_info.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_info 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_info \- Returns the hints for a file that are actually being used by MPI -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B info_used -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position.3 deleted file mode 100644 index 41c5827b49..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_position 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_get_position \- Returns the current position of the individual file pointer in etype units relative to the current view -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_position(MPI_File fh, MPI_Offset *offset) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B offset -- offset of individual file pointer (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position_shared.3 deleted file mode 100644 index 6b0ee36b22..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_position_shared.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_position_shared 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_get_position_shared \- Returns the current position of the shared file pointer in etype units relative to the current view -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B offset -- offset of shared file pointer (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_size.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_size.3 deleted file mode 100644 index 55927e4198..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_size.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH MPI_File_get_size 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_size \- Returns the file size -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_size(MPI_File fh, MPI_Offset *size) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B size -- size of the file in bytes (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_type_extent.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_type_extent.3 deleted file mode 100644 index 74c3ac7a19..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_type_extent.3 +++ /dev/null @@ -1,37 +0,0 @@ -.TH MPI_File_get_type_extent 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_get_type_extent \- Returns the extent of datatype in the file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, - MPI_Aint *extent) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B extent -- extent of the datatype (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_view.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_get_view.3 deleted file mode 100644 index 6470b0a847..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_get_view.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_get_view 3 "4/6/1998" " " "MPI-2" -.SH NAME -MPI_File_get_view \- Returns the file view -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, - MPI_Datatype *filetype, char *datarep) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B disp -- displacement (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B etype -- elementary datatype (handle) -.PD 1 -.PD 0 -.TP -.B filetype -- filetype (handle) -.PD 1 -.PD 0 -.TP -.B datarep -- data representation (string) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iread.3 deleted file mode 100644 index 5bbc1c35ca..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_iread 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_iread \- Nonblocking read using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iread(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_at.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_at.3 deleted file mode 100644 index d1ad5271d2..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_at.3 +++ /dev/null @@ -1,53 +0,0 @@ -.TH MPI_File_iread_at 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_iread_at \- Nonblocking read using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, - MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_shared.3 deleted file mode 100644 index e71c55df64..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iread_shared.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_iread_shared 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_iread_shared \- Nonblocking read using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iread_shared(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite.3 deleted file mode 100644 index 447dec5f9f..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_iwrite 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_iwrite \- Nonblocking write using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iwrite(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_at.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_at.3 deleted file mode 100644 index 4d54b60dae..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_at.3 +++ /dev/null @@ -1,53 +0,0 @@ -.TH MPI_File_iwrite_at 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_iwrite_at \- Nonblocking write using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, - MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_shared.3 deleted file mode 100644 index 80ce107f3a..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_iwrite_shared.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_iwrite_shared 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_iwrite_shared \- Nonblocking write using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_iwrite_shared(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPIO_Request *request) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B request -- request object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_open.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_open.3 deleted file mode 100644 index 6bf223d1bd..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_open.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_open 3 "3/4/1999" " " "MPI-2" -.SH NAME -MPI_File_open \- Opens a file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_open(MPI_Comm comm, char *filename, int amode, - MPI_Info info, MPI_File *fh) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B comm -- communicator (handle) -.PD 1 -.PD 0 -.TP -.B filename -- name of file to open (string) -.PD 1 -.PD 0 -.TP -.B amode -- file access mode (integer) -.PD 1 -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_preallocate.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_preallocate.3 deleted file mode 100644 index 02154daae4..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_preallocate.3 +++ /dev/null @@ -1,29 +0,0 @@ -.TH MPI_File_preallocate 3 "3/4/1998" " " "MPI-2" -.SH NAME -MPI_File_preallocate \- Preallocates storage space for a file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_preallocate(MPI_File fh, MPI_Offset size) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B size -- size to preallocate (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read.3 deleted file mode 100644 index f71c361b09..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_read 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read \- Read using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all.3 deleted file mode 100644 index 5d3eb8448a..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_read_all 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_all \- Collective read using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_all(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_begin.3 deleted file mode 100644 index 4a5f95ce89..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_begin.3 +++ /dev/null @@ -1,42 +0,0 @@ -.TH MPI_File_read_all_begin 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_all_begin \- Begin a split collective read using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, - MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_end.3 deleted file mode 100644 index fe194e862d..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_all_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_read_all_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_all_end \- Complete a split collective read using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at.3 deleted file mode 100644 index b3f8d10f7a..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at.3 +++ /dev/null @@ -1,52 +0,0 @@ -.TH MPI_File_read_at 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_at \- Read using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all.3 deleted file mode 100644 index cacdd659b8..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all.3 +++ /dev/null @@ -1,53 +0,0 @@ -.TH MPI_File_read_at_all 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_at_all \- Collective read using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, - MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_begin.3 deleted file mode 100644 index 1d983667fe..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_begin.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_read_at_all_begin 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_at_all_begin \- Begin a split collective read using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_end.3 deleted file mode 100644 index ec3e90c300..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_at_all_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_read_at_all_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_at_all_end \- Complete a split collective read using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered.3 deleted file mode 100644 index 68c2772f70..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_read_ordered 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_read_ordered \- Collective read using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_ordered(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_begin.3 deleted file mode 100644 index ae77d71aea..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_begin.3 +++ /dev/null @@ -1,42 +0,0 @@ -.TH MPI_File_read_ordered_begin 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_read_ordered_begin \- Begin a split collective read using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, - MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_end.3 deleted file mode 100644 index 27b9fb50ea..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_ordered_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_read_ordered_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_read_ordered_end \- Complete a split collective read using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_read_shared.3 deleted file mode 100644 index 2736b8c3ca..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_read_shared.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_read_shared 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_read_shared \- Read using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_read_shared(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_seek.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_seek.3 deleted file mode 100644 index e4ba73a08c..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_seek.3 +++ /dev/null @@ -1,34 +0,0 @@ -.TH MPI_File_seek 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_seek \- Updates the individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (integer) -.PD 1 -.PD 0 -.TP -.B whence -- update mode (state) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_seek_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_seek_shared.3 deleted file mode 100644 index 3420094564..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_seek_shared.3 +++ /dev/null @@ -1,34 +0,0 @@ -.TH MPI_File_seek_shared 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_seek_shared \- Updates the shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (integer) -.PD 1 -.PD 0 -.TP -.B whence -- update mode (state) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_atomicity.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_set_atomicity.3 deleted file mode 100644 index 010935d129..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_atomicity.3 +++ /dev/null @@ -1,29 +0,0 @@ -.TH MPI_File_set_atomicity 3 "3/4/1998" " " "MPI-2" -.SH NAME -MPI_File_set_atomicity \- Sets the atomicity mode -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_set_atomicity(MPI_File fh, int flag) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B flag -- true to set atomic mode, false to set nonatomic mode (logical) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_info.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_set_info.3 deleted file mode 100644 index 5beb0885e3..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_info.3 +++ /dev/null @@ -1,29 +0,0 @@ -.TH MPI_File_set_info 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_set_info \- Sets new values for the hints associated with a file -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_set_info(MPI_File fh, MPI_Info info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_size.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_set_size.3 deleted file mode 100644 index 5761975eb7..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_size.3 +++ /dev/null @@ -1,29 +0,0 @@ -.TH MPI_File_set_size 3 "3/10/1998" " " "MPI-2" -.SH NAME -MPI_File_set_size \- Sets the file size -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_set_size(MPI_File fh, MPI_Offset size) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B size -- size to truncate or expand file (nonnegative integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_view.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_set_view.3 deleted file mode 100644 index 5230aa9668..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_set_view.3 +++ /dev/null @@ -1,50 +0,0 @@ -.TH MPI_File_set_view 3 "3/4/1999" " " "MPI-2" -.SH NAME -MPI_File_set_view \- Sets the file view -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, - MPI_Datatype filetype, char *datarep, MPI_Info info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B disp -- displacement (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B etype -- elementary datatype (handle) -.PD 1 -.PD 0 -.TP -.B filetype -- filetype (handle) -.PD 1 -.PD 0 -.TP -.B datarep -- data representation (string) -.PD 1 -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_sync.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_sync.3 deleted file mode 100644 index ab6169fec3..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_sync.3 +++ /dev/null @@ -1,24 +0,0 @@ -.TH MPI_File_sync 3 "2/5/1998" " " "MPI-2" -.SH NAME -MPI_File_sync \- Causes all previous writes to be transferred to the storage device -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_sync(MPI_File fh) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write.3 deleted file mode 100644 index 8860a7243c..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_write 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write \- Write using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all.3 deleted file mode 100644 index 2b2ec5b64a..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_write_all 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_all \- Collective write using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_all(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_begin.3 deleted file mode 100644 index f1dbad6f59..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_begin.3 +++ /dev/null @@ -1,40 +0,0 @@ -.TH MPI_File_write_all_begin 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_all_begin \- Begin a split collective write using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_all_begin(MPI_File fh, void *buf, int count, - MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_end.3 deleted file mode 100644 index 131ee76be3..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_all_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_write_all_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_all_end \- Complete a split collective write using individual file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_all_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at.3 deleted file mode 100644 index 9668d93ab5..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at.3 +++ /dev/null @@ -1,53 +0,0 @@ -.TH MPI_File_write_at 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_at \- Write using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, - MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all.3 deleted file mode 100644 index 2a7bc1db1f..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all.3 +++ /dev/null @@ -1,53 +0,0 @@ -.TH MPI_File_write_at_all 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_at_all \- Collective write using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, - MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_begin.3 deleted file mode 100644 index fbe4e3a4bc..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_begin.3 +++ /dev/null @@ -1,45 +0,0 @@ -.TH MPI_File_write_at_all_begin 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_at_all_begin \- Begin a split collective write using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B offset -- file offset (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_end.3 deleted file mode 100644 index f72cd4fa07..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_at_all_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_write_at_all_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_at_all_end \- Complete a split collective write using explict offset -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_at_all_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered.3 deleted file mode 100644 index 28a477b0e0..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_write_ordered 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_write_ordered \- Collective write using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_ordered(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_begin.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_begin.3 deleted file mode 100644 index e59436a7d2..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_begin.3 +++ /dev/null @@ -1,42 +0,0 @@ -.TH MPI_File_write_ordered_begin 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_write_ordered_begin \- Begin a split collective write using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_ordered_begin(MPI_File fh, void *buf, int count, - MPI_Datatype datatype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_end.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_end.3 deleted file mode 100644 index a3e8e23684..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_ordered_end.3 +++ /dev/null @@ -1,36 +0,0 @@ -.TH MPI_File_write_ordered_end 3 "8/31/1998" " " "MPI-2" -.SH NAME -MPI_File_write_ordered_end \- Complete a split collective write using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_ordered_end(MPI_File fh, void *buf, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_shared.3 b/ompi/mca/io/romio/romio/man/man3/MPI_File_write_shared.3 deleted file mode 100644 index ddfcea9251..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_File_write_shared.3 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MPI_File_write_shared 3 "9/3/1998" " " "MPI-2" -.SH NAME -MPI_File_write_shared \- Write using shared file pointer -.SH SYNOPSIS -.nf -#include "mpi.h" -int MPI_File_write_shared(MPI_File fh, void *buf, int count, - MPI_Datatype datatype, MPI_Status *status) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B fh -- file handle (handle) -.PD 1 -.PD 0 -.TP -.B buf -- initial address of buffer (choice) -.PD 1 -.PD 0 -.TP -.B count -- number of elements in buffer (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B datatype -- datatype of each buffer element (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B status -- status object (Status) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for 'MPI_WTIME' and 'MPI_WTICK') -have an additional argument 'ierr' at the end of the argument list. -'ierr' is an integer and has the same meaning as the return value of -the routine in C. In Fortran, MPI routines are subroutines and are -invoked with the 'call' statement. - -All MPI objects (e.g., 'MPI_Datatype', 'MPI_Comm', 'MPI_File') are of -type 'INTEGER' in Fortran. diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_c2f.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_c2f.3 deleted file mode 100644 index 21b7fe9969..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_c2f.3 +++ /dev/null @@ -1,18 +0,0 @@ -.TH MPI_Info_c2f 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_c2f \- Translates a C info handle to a Fortran info handle -.SH SYNOPSIS -.nf -MPI_Fint MPI_Info_c2f(MPI_Info info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- C info handle (integer) -.PD 1 - -.SH RETURN VALUE -Fortran info handle (handle) -.SH LOCATION -info_c2f.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_create.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_create.3 deleted file mode 100644 index e637f4a391..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_create.3 +++ /dev/null @@ -1,38 +0,0 @@ -.TH MPI_Info_create 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_create \- Creates a new info object -.SH SYNOPSIS -.nf -int MPI_Info_create(MPI_Info *info) -.fi -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_create.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_delete.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_delete.3 deleted file mode 100644 index b6bbf8cf49..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_delete.3 +++ /dev/null @@ -1,43 +0,0 @@ -.TH MPI_Info_delete 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_delete \- Deletes a (key,value) pair from info -.SH SYNOPSIS -.nf -int MPI_Info_delete(MPI_Info info, char *key) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 -.PD 0 -.TP -.B key -- key (string) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_delete.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_dup.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_dup.3 deleted file mode 100644 index dcc5c3ab88..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_dup.3 +++ /dev/null @@ -1,45 +0,0 @@ -.TH MPI_Info_dup 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_dup \- Returns a duplicate of the info object -.SH SYNOPSIS -.nf -int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B newinfo -- duplicate of info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_dup.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_f2c.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_f2c.3 deleted file mode 100644 index 6649c82c71..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_f2c.3 +++ /dev/null @@ -1,18 +0,0 @@ -.TH MPI_Info_f2c 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_f2c \- Translates a Fortran info handle to a C info handle -.SH SYNOPSIS -.nf -MPI_Info MPI_Info_f2c(MPI_Fint info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- Fortran info handle (integer) -.PD 1 - -.SH RETURN VALUE -C info handle (handle) -.SH LOCATION -info_f2c.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_free.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_free.3 deleted file mode 100644 index f76d68f8c8..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_free.3 +++ /dev/null @@ -1,38 +0,0 @@ -.TH MPI_Info_free 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_free \- Frees an info object -.SH SYNOPSIS -.nf -int MPI_Info_free(MPI_Info *info) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_free.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_get.3 deleted file mode 100644 index a9433ab033..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get.3 +++ /dev/null @@ -1,60 +0,0 @@ -.TH MPI_Info_get 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_get \- Retrieves the value associated with a key -.SH SYNOPSIS -.nf -int MPI_Info_get(MPI_Info info, char *key, int valuelen, char *value, int *flag) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 -.PD 0 -.TP -.B key -- key (string) -.PD 1 -.PD 0 -.TP -.B valuelen -- length of value argument (integer) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B value -- value (string) -.PD 1 -.PD 0 -.TP -.B flag -- true if key defined, false if not (boolean) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_get.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nkeys.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nkeys.3 deleted file mode 100644 index ec3196e459..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nkeys.3 +++ /dev/null @@ -1,45 +0,0 @@ -.TH MPI_Info_get_nkeys 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_get_nkeys \- Returns the number of currently defined keys in info -.SH SYNOPSIS -.nf -int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B nkeys -- number of defined keys (integer) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_getnks.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nthkey.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nthkey.3 deleted file mode 100644 index c29d98847c..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_nthkey.3 +++ /dev/null @@ -1,50 +0,0 @@ -.TH MPI_Info_get_nthkey 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_get_nthkey \- Returns the nth defined key in info -.SH SYNOPSIS -.nf -int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 -.PD 0 -.TP -.B n -- key number (integer) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B keys -- key (string) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_getnth.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_valuelen.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_valuelen.3 deleted file mode 100644 index a65604fd18..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_get_valuelen.3 +++ /dev/null @@ -1,55 +0,0 @@ -.TH MPI_Info_get_valuelen 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_get_valuelen \- Retrieves the length of the value associated with a key -.SH SYNOPSIS -.nf -int MPI_Info_get_valuelen(MPI_Info info, char *key, int *valuelen, int *flag) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 -.PD 0 -.TP -.B key -- key (string) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B valuelen -- length of value argument (integer) -.PD 1 -.PD 0 -.TP -.B flag -- true if key defined, false if not (boolean) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_getvln.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Info_set.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Info_set.3 deleted file mode 100644 index 589762a320..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Info_set.3 +++ /dev/null @@ -1,48 +0,0 @@ -.TH MPI_Info_set 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Info_set \- Adds a (key,value) pair to info -.SH SYNOPSIS -.nf -int MPI_Info_set(MPI_Info info, char *key, char *value) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B info -- info object (handle) -.PD 1 -.PD 0 -.TP -.B key -- key (string) -.PD 1 -.PD 0 -.TP -.B value -- value (string) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -info_set.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_darray.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_darray.3 deleted file mode 100644 index e164c58829..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_darray.3 +++ /dev/null @@ -1,89 +0,0 @@ -.TH MPI_Type_create_darray 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Type_create_darray \- Creates a datatype corresponding to a distributed, multidimensional array -.SH SYNOPSIS -.nf -int MPI_Type_create_darray(int size, int rank, int ndims, - int *array_of_gsizes, int *array_of_distribs, - int *array_of_dargs, int *array_of_psizes, - int order, MPI_Datatype oldtype, - MPI_Datatype *newtype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B size -- size of process group (positive integer) -.PD 1 -.PD 0 -.TP -.B rank -- rank in process group (nonnegative integer) -.PD 1 -.PD 0 -.TP -.B ndims -- number of array dimensions as well as process grid dimensions (positive integer) -.PD 1 -.PD 0 -.TP -.B array_of_gsizes -- number of elements of type oldtype in each dimension of global array (array of positive integers) -.PD 1 -.PD 0 -.TP -.B array_of_distribs -- distribution of array in each dimension (array of state) -.PD 1 -.PD 0 -.TP -.B array_of_dargs -- distribution argument in each dimension (array of positive integers) -.PD 1 -.PD 0 -.TP -.B array_of_psizes -- size of process grid in each dimension (array of positive integers) -.PD 1 -.PD 0 -.TP -.B order -- array storage order flag (state) -.PD 1 -.PD 0 -.TP -.B oldtype -- old datatype (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B newtype -- new datatype (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -darray.c diff --git a/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_subarray.3 b/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_subarray.3 deleted file mode 100644 index f1b142dd3f..0000000000 --- a/ompi/mca/io/romio/romio/man/man3/MPI_Type_create_subarray.3 +++ /dev/null @@ -1,73 +0,0 @@ -.TH MPI_Type_create_subarray 3 "2/9/2000" " " "MPI-2" -.SH NAME -MPI_Type_create_subarray \- Creates a datatype describing a subarray of a multidimensional array -.SH SYNOPSIS -.nf -int MPI_Type_create_subarray(int ndims, int *array_of_sizes, - int *array_of_subsizes, int *array_of_starts, - int order, MPI_Datatype oldtype, - MPI_Datatype *newtype) -.fi -.SH INPUT PARAMETERS -.PD 0 -.TP -.B ndims -- number of array dimensions (positive integer) -.PD 1 -.PD 0 -.TP -.B array_of_sizes -- number of elements of type oldtype in each dimension of the full array (array of positive integers) -.PD 1 -.PD 0 -.TP -.B array_of_subsizes -- number of elements of type oldtype in each dimension of the subarray (array of positive integers) -.PD 1 -.PD 0 -.TP -.B array_of_starts -- starting coordinates of the subarray in each dimension (array of nonnegative integers) -.PD 1 -.PD 0 -.TP -.B order -- array storage order flag (state) -.PD 1 -.PD 0 -.TP -.B oldtype -- old datatype (handle) -.PD 1 - -.SH OUTPUT PARAMETERS -.PD 0 -.TP -.B newtype -- new datatype (handle) -.PD 1 - -.SH NOTES FOR FORTRAN -All MPI routines in Fortran (except for -.I MPI_WTIME -and -.I MPI_WTICK -) have -an additional argument -.I ierr -at the end of the argument list. -.I ierr -is an integer and has the same meaning as the return value of the routine -in C. In Fortran, MPI routines are subroutines, and are invoked with the -.I call -statement. - -All MPI objects (e.g., -.I MPI_Datatype -, -.I MPI_Comm -) are of type -.I INTEGER -in Fortran. -.SH LOCATION -subarray.c diff --git a/ompi/mca/io/romio/romio/mpi-io/Makefile.am b/ompi/mca/io/romio/romio/mpi-io/Makefile.am index 1ce0c0021f..39bfc109c4 100644 --- a/ompi/mca/io/romio/romio/mpi-io/Makefile.am +++ b/ompi/mca/io/romio/romio/mpi-io/Makefile.am @@ -89,6 +89,7 @@ libmpi_io_la_SOURCES = \ write_ord.c \ write_orde.c \ write_sh.c \ - register_datarep.c \ - mpiu_greq.c \ - mpich2_fileutil.c + register_datarep.c \ + mpiu_greq.c \ + mpich2_fileutil.c \ + mpir-mpioinit.c diff --git a/ompi/mca/io/romio/romio/mpi-io/close.c b/ompi/mca/io/romio/romio/mpi-io/close.c index 1b7e9aedce..0f31532ad2 100644 --- a/ompi/mca/io/romio/romio/mpi-io/close.c +++ b/ompi/mca/io/romio/romio/mpi-io/close.c @@ -42,8 +42,7 @@ int MPI_File_close(MPI_File *mpi_fh) HPMP_IO_WSTART(fl_xmpi, BLKMPIFILECLOSE, TRDTBLOCK, *fh); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(*mpi_fh); @@ -51,15 +50,15 @@ int MPI_File_close(MPI_File *mpi_fh) MPIO_CHECK_FILE_HANDLE(fh, myname, error_code); /* --END ERROR HANDLING-- */ - if (((fh)->file_system != ADIO_PIOFS) && - ((fh)->file_system != ADIO_PVFS) && - ((fh)->file_system != ADIO_PVFS2) && - ((fh)->file_system != ADIO_GRIDFTP)) + if (ADIO_Feature(fh, ADIO_SHARED_FP)) { ADIOI_Free((fh)->shared_fp_fname); /* need a barrier because the file containing the shared file pointer is opened with COMM_SELF. We don't want it to be deleted while others are still accessing it. */ + /* FIXME: It is wrong to use MPI_Barrier; the user could choose to + re-implement MPI_Barrier in an unexpected way. Either use + MPIR_Barrier_impl as in MPICH2 or PMPI_Barrier */ MPI_Barrier((fh)->comm); if ((fh)->shared_fp_fd != ADIO_FILE_NULL) { MPI_File *mpi_fh_shared = &(fh->shared_fp_fd); @@ -71,6 +70,19 @@ int MPI_File_close(MPI_File *mpi_fh) } } + /* Because ROMIO expects the MPI library to provide error handler management + * routines but it doesn't ever participate in MPI_File_close, we have to + * somehow inform the MPI library that we no longer hold a reference to any + * user defined error handler. We do this by setting the errhandler at this + * point to MPI_ERRORS_RETURN. */ +/* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c + in routine mca_io_romio_file_close() +*/ +#if 0 + error_code = PMPI_File_set_errhandler(*mpi_fh, MPI_ERRORS_RETURN); + if (error_code != MPI_SUCCESS) goto fn_fail; +#endif + ADIO_Close(fh, &error_code); MPIO_File_free(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -81,13 +93,11 @@ int MPI_File_close(MPI_File *mpi_fh) HPMP_IO_WEND(fl_xmpi); #endif /* MPI_hpux */ - MPIR_Nest_decr(); fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; fn_fail: /* --BEGIN ERROR HANDLING-- */ - MPIR_Nest_decr(); error_code = MPIO_Err_return_file(fh, error_code); goto fn_exit; /* --END ERROR HANDLING-- */ diff --git a/ompi/mca/io/romio/romio/mpi-io/delete.c b/ompi/mca/io/romio/romio/mpi-io/delete.c index e328850c71..225a21680d 100644 --- a/ompi/mca/io/romio/romio/mpi-io/delete.c +++ b/ompi/mca/io/romio/romio/mpi-io/delete.c @@ -23,8 +23,6 @@ #include "mpioprof.h" #endif -extern int ADIO_Init_keyval; - /*@ MPI_File_delete - Deletes a file @@ -36,10 +34,9 @@ Input Parameters: @*/ int MPI_File_delete(char *filename, MPI_Info info) { - int flag, error_code, file_system; + int error_code, file_system; char *tmp; ADIOI_Fns *fsops; - static char myname[] = "MPI_FILE_DELETE"; #ifdef MPI_hpux int fl_xmpi; @@ -49,37 +46,10 @@ int MPI_File_delete(char *filename, MPI_Info info) MPIU_UNREFERENCED_ARG(info); - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); - - /* first check if ADIO has been initialized. If not, initialize it */ - if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { - MPI_Initialized(&flag); - - /* --BEGIN ERROR HANDLING-- */ - if (!flag) { - error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_OTHER, - "**initialized", 0); - error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); - goto fn_exit; - } - /* --END ERROR HANDLING-- */ - - MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, - (void *) 0); - - /* put a dummy attribute on MPI_COMM_WORLD, because we want the delete - function to be called when MPI_COMM_WORLD is freed. Hopefully the - MPI library frees MPI_COMM_WORLD when MPI_Finalize is called, - though the standard does not mandate this. */ - - MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0); - - /* initialize ADIO */ - ADIO_Init( (int *)0, (char ***)0, &error_code); - } + MPIU_THREAD_CS_ENTER(ALLFUNC,); + MPIR_MPIOInit(&error_code); + if (error_code != MPI_SUCCESS) goto fn_exit; /* resolve file system type from file name; this is a collective call */ ADIO_ResolveFileType(MPI_COMM_SELF, filename, &file_system, &fsops, @@ -118,7 +88,6 @@ int MPI_File_delete(char *filename, MPI_Info info) #endif /* MPI_hpux */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c b/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c index 9fea49a72a..a62a959acf 100644 --- a/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c +++ b/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c @@ -105,7 +105,7 @@ void mpi_file_get_type_extent_(MPI_Fint *fh,MPI_Fint *datatype, datatype_c = MPI_Type_f2c(*datatype); *ierr = MPI_File_get_type_extent(fh_c,datatype_c, &extent_c); - *extent = (MPI_Fint) extent_c; + *(MPI_Aint*)extent = extent_c; /* Have to assume it's really an MPI_Aint?*/ } #else @@ -121,6 +121,6 @@ FORTRAN_API void FORT_CALL mpi_file_get_type_extent_(MPI_Fint *fh,MPI_Datatype * fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_type_extent(fh_c,*datatype, &extent_c); - *extent = (MPI_Fint) extent_c; + *(MPI_Aint*)extent = extent_c; /* Have to assume it's really an MPI_Aint?*/ } #endif diff --git a/ompi/mca/io/romio/romio/mpi-io/fsync.c b/ompi/mca/io/romio/romio/mpi-io/fsync.c index dfec3bf38c..a26c4ee70c 100644 --- a/ompi/mca/io/romio/romio/mpi-io/fsync.c +++ b/ompi/mca/io/romio/romio/mpi-io/fsync.c @@ -43,8 +43,7 @@ int MPI_File_sync(MPI_File mpi_fh) HPMP_IO_START(fl_xmpi, BLKMPIFILESYNC, TRDTBLOCK, fh, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -71,7 +70,6 @@ int MPI_File_sync(MPI_File mpi_fh) #endif /* MPI_hpux */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c b/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c index e770b8a662..c1e982442d 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c @@ -46,8 +46,6 @@ int MPI_File_get_byte_offset(MPI_File mpi_fh, ADIO_File fh; static char myname[] = "MPI_FILE_GET_BYTE_OFFSET"; - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -68,7 +66,6 @@ int MPI_File_get_byte_offset(MPI_File mpi_fh, ADIOI_Get_byte_offset(fh, offset, disp); fn_exit: - MPIR_Nest_decr(); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_errh.c b/ompi/mca/io/romio/romio/mpi-io/get_errh.c index 1ea878c708..bf4b08e7f0 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_errh.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_errh.c @@ -40,8 +40,9 @@ int MPI_File_get_errhandler(MPI_File mpi_fh, MPI_Errhandler *errhandler) int error_code = MPI_SUCCESS; ADIO_File fh; static char myname[] = "MPI_FILE_GET_ERRHANDLER"; + MPIU_THREADPRIV_DECL; - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (mpi_fh == MPI_FILE_NULL) { *errhandler = ADIOI_DFLT_ERR_HANDLER; @@ -63,6 +64,6 @@ int MPI_File_get_errhandler(MPI_File mpi_fh, MPI_Errhandler *errhandler) } fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_extent.c b/ompi/mca/io/romio/romio/mpi-io/get_extent.c index 0ead1490ff..bb886bdc76 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_extent.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_extent.c @@ -42,8 +42,6 @@ int MPI_File_get_type_extent(MPI_File mpi_fh, MPI_Datatype datatype, ADIO_File fh; static char myname[] = "MPI_FILE_GET_TYPE_EXTENT"; - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -56,6 +54,5 @@ int MPI_File_get_type_extent(MPI_File mpi_fh, MPI_Datatype datatype, error_code = MPI_Type_extent(datatype, extent); fn_exit: - MPIR_Nest_decr(); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_group.c b/ompi/mca/io/romio/romio/mpi-io/get_group.c index fe6ebaa4ca..747318f49b 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_group.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_group.c @@ -41,8 +41,7 @@ int MPI_File_get_group(MPI_File mpi_fh, MPI_Group *group) ADIO_File fh; static char myname[] = "MPI_FILE_GET_GROUP"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -58,7 +57,6 @@ int MPI_File_get_group(MPI_File mpi_fh, MPI_Group *group) error_code = MPI_Comm_group(fh->comm, group); fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_info.c b/ompi/mca/io/romio/romio/mpi-io/get_info.c index 4e88503faf..96b16a12e5 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_info.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_info.c @@ -40,8 +40,7 @@ int MPI_File_get_info(MPI_File mpi_fh, MPI_Info *info_used) ADIO_File fh; static char myname[] = "MPI_FILE_GET_INFO"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -56,7 +55,6 @@ int MPI_File_get_info(MPI_File mpi_fh, MPI_Info *info_used) /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_posn.c b/ompi/mca/io/romio/romio/mpi-io/get_posn.c index 1db579b0ae..37c7fd0ad6 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_posn.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_posn.c @@ -43,8 +43,6 @@ int MPI_File_get_position(MPI_File mpi_fh, MPI_Offset *offset) ADIO_File fh; static char myname[] = "MPI_FILE_GET_POSITION"; - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -55,7 +53,5 @@ int MPI_File_get_position(MPI_File mpi_fh, MPI_Offset *offset) ADIOI_Get_position(fh, offset); fn_exit: - MPIR_Nest_decr(); - return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c b/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c index a51dbbdb8c..873f5c4e19 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c @@ -41,8 +41,6 @@ int MPI_File_get_position_shared(MPI_File mpi_fh, MPI_Offset *offset) ADIO_File fh; static char myname[] = "MPI_FILE_GET_POSITION_SHARED"; - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -60,7 +58,5 @@ int MPI_File_get_position_shared(MPI_File mpi_fh, MPI_Offset *offset) /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_size.c b/ompi/mca/io/romio/romio/mpi-io/get_size.c index 4ff1c27f23..a23d2c9197 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_size.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_size.c @@ -47,8 +47,6 @@ int MPI_File_get_size(MPI_File mpi_fh, MPI_Offset *size) MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -71,7 +69,5 @@ int MPI_File_get_size(MPI_File mpi_fh, MPI_Offset *size) #endif /* MPI_hpux */ fn_exit: - MPIR_Nest_decr(); - return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/get_view.c b/ompi/mca/io/romio/romio/mpi-io/get_view.c index bc8c8bfbea..ba2a249c1e 100644 --- a/ompi/mca/io/romio/romio/mpi-io/get_view.c +++ b/ompi/mca/io/romio/romio/mpi-io/get_view.c @@ -52,9 +52,7 @@ int MPI_File_get_view(MPI_File mpi_fh, int i, j, k, combiner; MPI_Datatype copy_etype, copy_filetype; - - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -77,15 +75,16 @@ int MPI_File_get_view(MPI_File mpi_fh, MPI_Type_get_envelope(fh->etype, &i, &j, &k, &combiner); if (combiner == MPI_COMBINER_NAMED) *etype = fh->etype; else { - MPIR_Nest_incr(); + /* FIXME: It is wrong to use MPI_Type_contiguous; the user could choose to + re-implement MPI_Type_contiguous in an unexpected way. Either use + MPIR_Barrier_impl as in MPICH2 or PMPI_Type_contiguous */ MPI_Type_contiguous(1, fh->etype, ©_etype); - MPIR_Nest_decr(); - MPIR_Nest_incr(); + /* FIXME: Ditto for MPI_Type_commit - use NMPI or PMPI */ MPI_Type_commit(©_etype); - MPIR_Nest_decr(); *etype = copy_etype; } + /* FIXME: Ditto for MPI_Type_xxx - use NMPI or PMPI */ MPI_Type_get_envelope(fh->filetype, &i, &j, &k, &combiner); if (combiner == MPI_COMBINER_NAMED) *filetype = fh->filetype; else { @@ -96,8 +95,7 @@ int MPI_File_get_view(MPI_File mpi_fh, } fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c b/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c index 1ef679bdaa..e59b688c74 100644 --- a/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c +++ b/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c @@ -74,7 +74,7 @@ int MPIO_Err_return_file(MPI_File mpi_fh, int error_code) kind = 1: errors return kind = 2: errors call function */ - if (e == MPI_ERRORS_RETURN || !e) { + if (e == MPI_ERRORS_RETURN || e == MPIR_ERRORS_THROW_EXCEPTIONS || !e) { /* FIXME: This is a hack in case no error handler was set */ kind = 1; c_errhandler = 0; diff --git a/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c b/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c index 1efb0bbee1..72708b8b79 100644 --- a/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c +++ b/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c @@ -47,14 +47,16 @@ MPI_Fint MPIO_Request_c2f(MPIO_Request request) return (MPI_Fint) request; #else int i; + MPIU_THREADPRIV_DECL; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - + /* We can make this test outside of the ALLFUNC mutex because it does + not access any shared data */ if ((request <= (MPIO_Request) 0) || (request->cookie != ADIOI_REQ_COOKIE)) { - MPIU_THREAD_SINGLE_CS_EXIT("io"); return (MPI_Fint) 0; } + + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (!ADIOI_Reqtable) { ADIOI_Reqtable_max = 1024; ADIOI_Reqtable = (MPIO_Request *) @@ -73,7 +75,7 @@ MPI_Fint MPIO_Request_c2f(MPIO_Request request) ADIOI_Reqtable_ptr++; ADIOI_Reqtable[ADIOI_Reqtable_ptr] = request; - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return (MPI_Fint) ADIOI_Reqtable_ptr; #endif } diff --git a/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c b/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c index bcc9c73174..670319d364 100644 --- a/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c +++ b/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c @@ -41,13 +41,15 @@ MPIO_Request MPIO_Request_f2c(MPI_Fint request) { #else MPIO_Request MPIO_Request_f2c(MPI_Fint request) { + int error_code; + static char myname[] = "MPIO_REQUEST_F2C"; + MPIU_THREADPRIV_DECL; + #ifndef INT_LT_POINTER return (MPIO_Request) request; #else - int error_code; - static char myname[] = "MPIO_REQUEST_F2C"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (!request) { return MPIO_REQUEST_NULL; @@ -63,7 +65,7 @@ MPIO_Request MPIO_Request_f2c(MPI_Fint request) /* --END ERROR HANDLING-- */ fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return ADIOI_Reqtable[request]; #endif } diff --git a/ompi/mca/io/romio/romio/mpi-io/iotest.c b/ompi/mca/io/romio/romio/mpi-io/iotest.c index 6a12fb68e7..df1671bb09 100644 --- a/ompi/mca/io/romio/romio/mpi-io/iotest.c +++ b/ompi/mca/io/romio/romio/mpi-io/iotest.c @@ -47,6 +47,7 @@ int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status) { int error_code; static char myname[] = "MPIO_TEST"; + MPIU_THREADPRIV_DECL; #ifdef MPI_hpux int fl_xmpi; @@ -55,7 +56,7 @@ int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status) } #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (*request == MPIO_REQUEST_NULL) { error_code = MPI_SUCCESS; @@ -88,7 +89,7 @@ int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status) #endif /* MPI_hpux */ fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } #endif diff --git a/ompi/mca/io/romio/romio/mpi-io/iotestall.c b/ompi/mca/io/romio/romio/mpi-io/iotestall.c index 46851c5a52..23b812f96f 100644 --- a/ompi/mca/io/romio/romio/mpi-io/iotestall.c +++ b/ompi/mca/io/romio/romio/mpi-io/iotestall.c @@ -32,13 +32,11 @@ int MPIO_Testall(int count, MPIO_Request requests[], int *flag, MPI_Status statuses[]) { int done, i, err; + MPIU_THREADPRIV_DECL; - - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (count == 1) { - MPIR_Nest_decr(); err = MPIO_Test( requests, flag, statuses ); - MPIR_Nest_decr(); goto fn_exit; } @@ -50,9 +48,7 @@ int MPIO_Testall(int count, MPIO_Request requests[], int *flag, done = 1; for (i=0; ifile_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) - && (fh->file_system != ADIO_PVFS2)) + if (ADIO_Feature(fh, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize); } @@ -147,9 +143,7 @@ int MPIOI_File_iread(MPI_File mpi_fh, ADIO_ReadContig(fh, buf, count, datatype, file_ptr_type, off, &status, &error_code); - if ((fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) - && (fh->file_system != ADIO_PVFS2)) + if (ADIO_Feature(fh, ADIO_LOCKS)) { ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize); } @@ -163,8 +157,6 @@ int MPIOI_File_iread(MPI_File mpi_fh, offset, request, &error_code); fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/iread_sh.c b/ompi/mca/io/romio/romio/mpi-io/iread_sh.c index d467511a56..4d3d399f02 100644 --- a/ompi/mca/io/romio/romio/mpi-io/iread_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/iread_sh.c @@ -52,8 +52,7 @@ int MPI_File_iread_shared(MPI_File mpi_fh, void *buf, int count, ADIO_Offset off, shared_fp; MPI_Offset nbytes=0; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -68,6 +67,7 @@ int MPI_File_iread_shared(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -131,8 +131,7 @@ int MPI_File_iread_shared(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } #endif diff --git a/ompi/mca/io/romio/romio/mpi-io/iwrite.c b/ompi/mca/io/romio/romio/mpi-io/iwrite.c index dd859624e6..0ef20e4c1a 100644 --- a/ompi/mca/io/romio/romio/mpi-io/iwrite.c +++ b/ompi/mca/io/romio/romio/mpi-io/iwrite.c @@ -45,9 +45,7 @@ int MPI_File_iwrite(MPI_File mpi_fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request) { int error_code=MPI_SUCCESS; - static char myname[] = "MPI_FILE_IWRITE"; - #ifdef MPI_hpux int fl_xmpi; @@ -55,8 +53,7 @@ int MPI_File_iwrite(MPI_File mpi_fh, void *buf, int count, count); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); error_code = MPIOI_File_iwrite(mpi_fh, (MPI_Offset) 0, ADIO_INDIVIDUAL, buf, count, datatype, myname, request); @@ -69,6 +66,7 @@ int MPI_File_iwrite(MPI_File mpi_fh, void *buf, int count, #ifdef MPI_hpux HPMP_IO_END(fl_xmpi, mpi_fh, datatype, count); #endif /* MPI_hpux */ + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } @@ -91,9 +89,6 @@ int MPIOI_File_iwrite(MPI_File mpi_fh, ADIO_File fh; MPI_Offset nbytes=0; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); - fh = MPIO_File_resolve(mpi_fh); /* --BEGIN ERROR HANDLING-- */ @@ -116,6 +111,7 @@ int MPIOI_File_iwrite(MPI_File mpi_fh, MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_WRITABLE(fh, myname, error_code); MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -140,9 +136,7 @@ int MPIOI_File_iwrite(MPI_File mpi_fh, else { /* to maintain strict atomicity semantics with other concurrent operations, lock (exclusive) and call blocking routine */ - if ((fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) - && (fh->file_system != ADIO_PVFS2)) + if (ADIO_Feature(fh, ADIO_LOCKS) ) { ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize); } @@ -150,9 +144,7 @@ int MPIOI_File_iwrite(MPI_File mpi_fh, ADIO_WriteContig(fh, buf, count, datatype, file_ptr_type, off, &status, &error_code); - if ((fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) - && (fh->file_system != ADIO_PVFS2)) + if (ADIO_Feature(fh, ADIO_LOCKS) ) { ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize); } @@ -168,9 +160,6 @@ int MPIOI_File_iwrite(MPI_File mpi_fh, offset, request, &error_code); } fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); - return error_code; } #endif diff --git a/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c b/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c index 5c60df7053..9812facc40 100644 --- a/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c @@ -51,8 +51,7 @@ int MPI_File_iwrite_shared(MPI_File mpi_fh, void *buf, int count, ADIO_Offset off, shared_fp; static char myname[] = "MPI_FILE_IWRITE_SHARED"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -67,6 +66,7 @@ int MPI_File_iwrite_shared(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -110,8 +110,7 @@ int MPI_File_iwrite_shared(MPI_File mpi_fh, void *buf, int count, shared_fp, request, &error_code); fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h b/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h index 1864c3abd1..e809d4ab3b 100644 --- a/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h +++ b/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h @@ -21,27 +21,13 @@ #include "mpiimpl.h" #include "mpiimplthread.h" -/* Use the routine versions of the nest macros, to avoid requiring - access to the MPIR_Process and MPIR_Thread structures */ -#ifdef MPIR_Nest_incr -#undef MPIR_Nest_incr -#undef MPIR_Nest_decr -#endif - -void MPIR_Nest_incr_export(void); -void MPIR_Nest_decr_export(void); -#define MPIR_Nest_incr MPIR_Nest_incr_export -#define MPIR_Nest_decr MPIR_Nest_decr_export - #else /* not ROMIO_INSIDE_MPICH2 */ /* Any MPI implementation that wishes to follow the thread-safety and error reporting features provided by MPICH2 must implement these four functions. Defining these as empty should not change the behavior of correct programs */ -#define MPIU_THREAD_SINGLE_CS_ENTER(_msg) -#define MPIU_THREAD_SINGLE_CS_EXIT(_msg) -#define MPIR_Nest_incr() -#define MPIR_Nest_decr() +#define MPIU_THREAD_CS_ENTER(x,y) +#define MPIU_THREAD_CS_EXIT(x,y) #ifdef HAVE_WINDOWS_H #define MPIU_UNREFERENCED_ARG(a) a #else @@ -60,6 +46,10 @@ struct MPIR_Info { MPI_Delete_function ADIOI_End_call; +/* common initialization routine */ +void MPIR_MPIOInit(int * error_code); + + #include "mpioprof.h" #ifdef MPI_hpux diff --git a/ompi/mca/io/romio/romio/mpi-io/mpioprof.h b/ompi/mca/io/romio/romio/mpi-io/mpioprof.h index 45c78d44a2..3cd568037c 100644 --- a/ompi/mca/io/romio/romio/mpi-io/mpioprof.h +++ b/ompi/mca/io/romio/romio/mpi-io/mpioprof.h @@ -19,7 +19,7 @@ * modification to all the files in the mpi-io directory. */ #if 0 -#ifdef MPIO_BUILD_PROFILING +#ifdef MPIO_BUILD_PROFILING #undef MPI_File_open #define MPI_File_open PMPI_File_open diff --git a/ompi/mca/io/romio/romio/mpi-io/mpir-mpioinit.c b/ompi/mca/io/romio/romio/mpi-io/mpir-mpioinit.c new file mode 100644 index 0000000000..d4d2cf877e --- /dev/null +++ b/ompi/mca/io/romio/romio/mpi-io/mpir-mpioinit.c @@ -0,0 +1,56 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2009 UChicago/Argonne LLC + * See COPYRIGHT in top-level directory. + */ +#include +#include "mpioimpl.h" + +#ifdef HAVE_WEAK_SYMBOLS +/* Include mapping from MPI->PMPI */ +#define MPIO_BUILD_PROFILING +#include "mpioprof.h" +#endif + +extern int ADIO_Init_keyval; + +/* common code to stuff an attribute on a communicator for the purpose of + * cleaning up in MPI_Finalize() */ + +void MPIR_MPIOInit(int * error_code) { + + int flag; + char myname[] = "MPIR_MPIOInit"; + + /* first check if ADIO has been initialized. If not, initialize it */ + if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { + MPI_Initialized(&flag); + + /* --BEGIN ERROR HANDLING-- */ + if (!flag) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_OTHER, "**initialized", 0); + *error_code = MPIO_Err_return_file(MPI_FILE_NULL, *error_code); + return; + } + /* --END ERROR HANDLING-- */ + + MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, + (void *) 0); + + /* put a dummy attribute on MPI_COMM_SELF, because we want the delete + function to be called when MPI_COMM_SELF is freed. Clarified + in MPI-2 section 4.8, the standard mandates that attributes on + MPI_COMM_SELF get cleaned up early in MPI_Finalize */ + + MPI_Attr_put(MPI_COMM_SELF, ADIO_Init_keyval, (void *) 0); + + /* initialize ADIO */ + ADIO_Init( (int *)0, (char ***)0, error_code); + } + *error_code = MPI_SUCCESS; +} +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c b/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c index bdd98fabf5..e460fee8d1 100644 --- a/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c +++ b/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c @@ -27,9 +27,7 @@ int MPIU_Greq_query_fn(void *extra_state, MPI_Status *status) status->MPI_ERROR = foo; /* and let Test|Wait know we weren't canceled */ - MPIR_Nest_incr(); MPI_Status_set_cancelled(status, 0); - MPIR_Nest_decr(); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/open.c b/ompi/mca/io/romio/romio/mpi-io/open.c index 01ab4912de..2d1bf996c2 100644 --- a/ompi/mca/io/romio/romio/mpi-io/open.c +++ b/ompi/mca/io/romio/romio/mpi-io/open.c @@ -23,6 +23,10 @@ #include "mpioprof.h" #endif +/* for user-definde reduce operator */ +#include "adio_extern.h" + + extern int ADIO_Init_keyval; /*@ @@ -42,20 +46,18 @@ Output Parameters: int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_File *fh) { - int error_code, file_system, flag, /* tmp_amode, */rank; + int error_code, file_system, flag, tmp_amode=0, rank; char *tmp; MPI_Comm dupcomm; ADIOI_Fns *fsops; static char myname[] = "MPI_FILE_OPEN"; - #ifdef MPI_hpux int fl_xmpi; HPMP_IO_OPEN_START(fl_xmpi, comm); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); /* --BEGIN ERROR HANDLING-- */ if (comm == MPI_COMM_NULL) @@ -102,50 +104,23 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, "**fileamodeseq", 0); goto fn_fail; } - /* --END ERROR HANDLING-- */ -/* check if amode is the same on all processes */ MPI_Comm_dup(comm, &dupcomm); -/* - Removed this check because broadcast is too expensive. - tmp_amode = amode; - MPI_Bcast(&tmp_amode, 1, MPI_INT, 0, dupcomm); - if (amode != tmp_amode) { - FPRINTF(stderr, "MPI_File_open: amode must be the same on all processes\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } -*/ - /* check if ADIO has been initialized. If not, initialize it */ - if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { - MPI_Initialized(&flag); + MPIR_MPIOInit(&error_code); + if (error_code != MPI_SUCCESS) goto fn_fail; - /* --BEGIN ERROR HANDLING-- */ - if (!flag) { - error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_OTHER, - "**initialized", 0); - goto fn_fail; - } - /* --END ERROR HANDLING-- */ +/* check if amode is the same on all processes */ + MPI_Allreduce(&amode, &tmp_amode, 1, MPI_INT, ADIO_same_amode, dupcomm); - MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, - (void *) 0); - -/* put a dummy attribute on MPI_COMM_WORLD, because we want the delete - function to be called when MPI_COMM_WORLD is freed. Hopefully the - MPI library frees MPI_COMM_WORLD when MPI_Finalize is called, - though the standard does not mandate this. */ - - MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0); - -/* initialize ADIO */ - - ADIO_Init( (int *)0, (char ***)0, &error_code); + if (tmp_amode == ADIO_AMODE_NOMATCH) { + error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, + myname, __LINE__, MPI_ERR_AMODE, + "**fileamodediff", 0); + goto fn_fail; } - + /* --END ERROR HANDLING-- */ file_system = -1; @@ -161,24 +136,6 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, goto fn_fail; } - /* Test for invalid flags in amode. - * - * eventually we should allow the ADIO implementations to test for - * invalid flags through some functional interface rather than having - * these tests here. -- Rob, 06/06/2001 - */ - if (((file_system == ADIO_PIOFS) || - (file_system == ADIO_PVFS) || - (file_system == ADIO_PVFS2) || - (file_system == ADIO_GRIDFTP)) && - (amode & MPI_MODE_SEQUENTIAL)) - { - error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, - MPI_ERR_UNSUPPORTED_OPERATION, - "**iosequnsupported", 0); - goto fn_fail; - } /* --END ERROR HANDLING-- */ /* strip off prefix if there is one, but only skip prefixes @@ -202,13 +159,24 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, } /* --END ERROR HANDLING-- */ + /* if MPI_MODE_SEQUENTIAL requested, file systems cannot do explicit offset + * or independent file pointer accesses, leaving not much else aside from + * shared file pointer accesses. */ + if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL)) + { + error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, + myname, __LINE__, + MPI_ERR_UNSUPPORTED_OPERATION, + "**iosequnsupported", 0); + ADIO_Close(*fh, &error_code); + goto fn_fail; + } + /* determine name of file that will hold the shared file pointer */ /* can't support shared file pointers on a file system that doesn't support file locking. */ - if ((error_code == MPI_SUCCESS) && ((*fh)->file_system != ADIO_PIOFS) - && ((*fh)->file_system != ADIO_PVFS) - && ((*fh)->file_system != ADIO_PVFS2) - && ((*fh)->file_system != ADIO_GRIDFTP) ){ + if ((error_code == MPI_SUCCESS) && + ADIO_Feature((*fh), ADIO_SHARED_FP)) { MPI_Comm_rank(dupcomm, &rank); ADIOI_Shfp_fname(*fh, rank); @@ -226,14 +194,11 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, HPMP_IO_OPEN_END(fl_xmpi, *fh, comm); #endif /* MPI_hpux */ - MPIR_Nest_decr(); - fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; fn_fail: /* --BEGIN ERROR HANDLING-- */ - MPIR_Nest_decr(); error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); goto fn_exit; /* --END ERROR HANDLING-- */ diff --git a/ompi/mca/io/romio/romio/mpi-io/prealloc.c b/ompi/mca/io/romio/romio/mpi-io/prealloc.c index d34fcbb4b9..620a6c78e4 100644 --- a/ompi/mca/io/romio/romio/mpi-io/prealloc.c +++ b/ompi/mca/io/romio/romio/mpi-io/prealloc.c @@ -46,8 +46,7 @@ int MPI_File_preallocate(MPI_File mpi_fh, MPI_Offset size) fh, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -74,7 +73,7 @@ int MPI_File_preallocate(MPI_File mpi_fh, MPI_Offset size) } /* --END ERROR HANDLING-- */ - if (size == 0) return MPI_SUCCESS; + if (size == 0) goto fn_exit; ADIOI_TEST_DEFERRED(fh, myname, &error_code); @@ -97,8 +96,7 @@ int MPI_File_preallocate(MPI_File mpi_fh, MPI_Offset size) fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); /* TODO: bcast result? */ if (!mynod) return error_code; diff --git a/ompi/mca/io/romio/romio/mpi-io/read.c b/ompi/mca/io/romio/romio/mpi-io/read.c index 0581ff3be3..74a6db7a22 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read.c +++ b/ompi/mca/io/romio/romio/mpi-io/read.c @@ -75,8 +75,7 @@ int MPIOI_File_read(MPI_File mpi_fh, ADIO_File fh; ADIO_Offset off; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -96,6 +95,11 @@ int MPIOI_File_read(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ MPI_Type_size(datatype, &datatype_size); + + /* --BEGIN ERROR HANDLING-- */ + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); + /* --END ERROR HANDLING-- */ + if (count*datatype_size == 0) { #ifdef HAVE_STATUS_SET_BYTES @@ -128,22 +132,15 @@ int MPIOI_File_read(MPI_File mpi_fh, } /* if atomic mode requested, lock (exclusive) the region, because - there could be a concurrent noncontiguous request. Locking doesn't - work on PIOFS and PVFS, and on NFS it is done in the - ADIO_ReadContig. + there could be a concurrent noncontiguous request. */ - - if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) && - (fh->file_system != ADIO_PVFS2)) + if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize); ADIO_ReadContig(fh, buf, count, datatype, file_ptr_type, off, status, &error_code); - if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) && - (fh->file_system != ADIO_PVFS2)) + if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS)) ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize); } else @@ -159,8 +156,7 @@ int MPIOI_File_read(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_all.c b/ompi/mca/io/romio/romio/mpi-io/read_all.c index 1aa271b48a..2ce9f22d40 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_all.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_all.c @@ -75,8 +75,7 @@ int MPIOI_File_read_all(MPI_File mpi_fh, int error_code, datatype_size; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -101,6 +100,7 @@ int MPIOI_File_read_all(MPI_File mpi_fh, MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_READABLE(fh, myname, error_code); MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIO_ReadStridedColl(fh, buf, count, datatype, file_ptr_type, @@ -112,8 +112,7 @@ int MPIOI_File_read_all(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_allb.c b/ompi/mca/io/romio/romio/mpi-io/read_allb.c index 6622618d87..d1a5bb59ae 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_allb.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_allb.c @@ -62,8 +62,7 @@ int MPIOI_File_read_all_begin(MPI_File mpi_fh, int error_code, datatype_size; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -96,6 +95,7 @@ int MPIOI_File_read_all_begin(MPI_File mpi_fh, error_code = MPIO_Err_return_file(fh, error_code); goto fn_exit; } + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ fh->split_coll_count = 1; @@ -109,8 +109,7 @@ int MPIOI_File_read_all_begin(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_alle.c b/ompi/mca/io/romio/romio/mpi-io/read_alle.c index 2fb6dcb90d..d7ff4627fe 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_alle.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_alle.c @@ -58,8 +58,7 @@ int MPIOI_File_read_all_end(MPI_File mpi_fh, MPIU_UNREFERENCED_ARG(buf); - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -82,8 +81,7 @@ int MPIOI_File_read_all_end(MPI_File mpi_fh, fh->split_coll_count = 0; fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_ord.c b/ompi/mca/io/romio/romio/mpi-io/read_ord.c index 826d11ab6e..41993cb55f 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_ord.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_ord.c @@ -48,8 +48,7 @@ int MPI_File_read_ordered(MPI_File mpi_fh, void *buf, int count, ADIO_Offset shared_fp=0; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -64,6 +63,7 @@ int MPI_File_read_ordered(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_TEST_DEFERRED(fh, "MPI_File_read_ordered", &error_code); @@ -99,8 +99,7 @@ int MPI_File_read_ordered(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); /* FIXME: Check for error code from ReadStridedColl? */ return error_code; diff --git a/ompi/mca/io/romio/romio/mpi-io/read_ordb.c b/ompi/mca/io/romio/romio/mpi-io/read_ordb.c index 0d3386ccac..2686ca3727 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_ordb.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_ordb.c @@ -45,8 +45,7 @@ int MPI_File_read_ordered_begin(MPI_File mpi_fh, void *buf, int count, ADIO_File fh; static char myname[] = "MPI_FILE_READ_ORDERED_BEGIN"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -72,6 +71,7 @@ int MPI_File_read_ordered_begin(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_TEST_DEFERRED(fh, myname, &error_code); @@ -107,8 +107,7 @@ int MPI_File_read_ordered_begin(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_orde.c b/ompi/mca/io/romio/romio/mpi-io/read_orde.c index 0d27799932..d07a2ddf43 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_orde.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_orde.c @@ -43,7 +43,7 @@ int MPI_File_read_ordered_end(MPI_File mpi_fh, void *buf, MPI_Status *status) MPIU_UNREFERENCED_ARG(buf); - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -67,7 +67,7 @@ int MPI_File_read_ordered_end(MPI_File mpi_fh, void *buf, MPI_Status *status) fh->split_coll_count = 0; fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/read_sh.c b/ompi/mca/io/romio/romio/mpi-io/read_sh.c index b2507bfcd2..f1459767f3 100644 --- a/ompi/mca/io/romio/romio/mpi-io/read_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/read_sh.c @@ -48,8 +48,7 @@ int MPI_File_read_shared(MPI_File mpi_fh, void *buf, int count, ADIO_Offset off, shared_fp; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -60,6 +59,11 @@ int MPI_File_read_shared(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ MPI_Type_size(datatype, &datatype_size); + + /* --BEGIN ERROR HANDLING-- */ + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); + /* --END ERROR HANDLING-- */ + if (count*datatype_size == 0) { #ifdef HAVE_STATUS_SET_BYTES @@ -124,8 +128,7 @@ int MPI_File_read_shared(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/register_datarep.c b/ompi/mca/io/romio/romio/mpi-io/register_datarep.c index 1150c3984d..c528abfc72 100644 --- a/ompi/mca/io/romio/romio/mpi-io/register_datarep.c +++ b/ompi/mca/io/romio/romio/mpi-io/register_datarep.c @@ -23,8 +23,6 @@ #include "mpioprof.h" #endif -extern int ADIO_Init_keyval; - /*@ MPI_Register_datarep - Register functions for user-defined data representations @@ -56,11 +54,11 @@ int MPI_Register_datarep(char *name, MPI_Datarep_extent_function *extent_fn, void *state) { - int error_code, flag; + int error_code; ADIOI_Datarep *datarep; static char myname[] = "MPI_REGISTER_DATAREP"; - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); /* --BEGIN ERROR HANDLING-- */ /* check datarep name (use strlen instead of strnlen because @@ -79,33 +77,8 @@ int MPI_Register_datarep(char *name, } /* --END ERROR HANDLING-- */ - /* first check if ADIO has been initialized. If not, initialize it */ - if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { - MPI_Initialized(&flag); - - /* --BEGIN ERROR HANDLING-- */ - if (!flag) { - error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_OTHER, - "**initialized", 0); - error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); - goto fn_exit; - } - /* --END ERROR HANDLING-- */ - - MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, - (void *) 0); - - /* put a dummy attribute on MPI_COMM_WORLD, because we want the delete - function to be called when MPI_COMM_WORLD is freed. Hopefully the - MPI library frees MPI_COMM_WORLD when MPI_Finalize is called, - though the standard does not mandate this. */ - - MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0); - - /* initialize ADIO */ - ADIO_Init( (int *)0, (char ***)0, &error_code); - } + MPIR_MPIOInit(&error_code); + if (error_code != MPI_SUCCESS) goto fn_exit; /* --BEGIN ERROR HANDLING-- */ /* check datarep isn't already registered */ @@ -156,7 +129,7 @@ int MPI_Register_datarep(char *name, error_code = MPI_SUCCESS; fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/seek.c b/ompi/mca/io/romio/romio/mpi-io/seek.c index 46b27bcce6..e038d4fa7d 100644 --- a/ompi/mca/io/romio/romio/mpi-io/seek.c +++ b/ompi/mca/io/romio/romio/mpi-io/seek.c @@ -47,8 +47,7 @@ int MPI_File_seek(MPI_File mpi_fh, MPI_Offset offset, int whence) HPMP_IO_START(fl_xmpi, BLKMPIFILESEEK, TRDTBLOCK, fh, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -133,7 +132,6 @@ int MPI_File_seek(MPI_File mpi_fh, MPI_Offset offset, int whence) error_code = MPI_SUCCESS; fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/seek_sh.c b/ompi/mca/io/romio/romio/mpi-io/seek_sh.c index afeeb6bdb4..feb11ae756 100644 --- a/ompi/mca/io/romio/romio/mpi-io/seek_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/seek_sh.c @@ -39,8 +39,7 @@ int MPI_File_seek_shared(MPI_File mpi_fh, MPI_Offset offset, int whence) MPI_Offset curr_offset, eof_offset, tmp_offset; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -177,8 +176,7 @@ int MPI_File_seek_shared(MPI_File mpi_fh, MPI_Offset offset, int whence) error_code = MPI_SUCCESS; fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/set_atom.c b/ompi/mca/io/romio/romio/mpi-io/set_atom.c index d4faf19e5f..4a1c4655f6 100644 --- a/ompi/mca/io/romio/romio/mpi-io/set_atom.c +++ b/ompi/mca/io/romio/romio/mpi-io/set_atom.c @@ -39,8 +39,7 @@ int MPI_File_set_atomicity(MPI_File mpi_fh, int flag) ADIO_Fcntl_t *fcntl_struct; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -85,7 +84,6 @@ int MPI_File_set_atomicity(MPI_File mpi_fh, int flag) ADIOI_Free(fcntl_struct); fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/set_errh.c b/ompi/mca/io/romio/romio/mpi-io/set_errh.c index 51727d92ea..991d3e40e6 100644 --- a/ompi/mca/io/romio/romio/mpi-io/set_errh.c +++ b/ompi/mca/io/romio/romio/mpi-io/set_errh.c @@ -38,8 +38,9 @@ int MPI_File_set_errhandler(MPI_File mpi_fh, MPI_Errhandler errhandler) int error_code = MPI_SUCCESS; static char myname[] = "MPI_FILE_SET_ERRHANDLER"; ADIO_File fh; + MPIU_THREADPRIV_DECL; - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); if (mpi_fh == MPI_FILE_NULL) { ADIOI_DFLT_ERR_HANDLER = errhandler; @@ -68,6 +69,6 @@ int MPI_File_set_errhandler(MPI_File mpi_fh, MPI_Errhandler errhandler) } fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/set_info.c b/ompi/mca/io/romio/romio/mpi-io/set_info.c index 751f46ebd0..e99fdc8fe7 100644 --- a/ompi/mca/io/romio/romio/mpi-io/set_info.c +++ b/ompi/mca/io/romio/romio/mpi-io/set_info.c @@ -38,8 +38,7 @@ int MPI_File_set_info(MPI_File mpi_fh, MPI_Info info) static char myname[] = "MPI_FILE_SET_INFO"; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -57,8 +56,7 @@ int MPI_File_set_info(MPI_File mpi_fh, MPI_Info info) /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/set_size.c b/ompi/mca/io/romio/romio/mpi-io/set_size.c index 0fa9654266..8823b12079 100644 --- a/ompi/mca/io/romio/romio/mpi-io/set_size.c +++ b/ompi/mca/io/romio/romio/mpi-io/set_size.c @@ -46,8 +46,7 @@ int MPI_File_set_size(MPI_File mpi_fh, MPI_Offset size) MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -92,8 +91,7 @@ int MPI_File_set_size(MPI_File mpi_fh, MPI_Offset size) #endif /* MPI_hpux */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/set_view.c b/ompi/mca/io/romio/romio/mpi-io/set_view.c index eda02b2288..ea0cbf112f 100644 --- a/ompi/mca/io/romio/romio/mpi-io/set_view.c +++ b/ompi/mca/io/romio/romio/mpi-io/set_view.c @@ -44,8 +44,7 @@ int MPI_File_set_view(MPI_File mpi_fh, MPI_Offset disp, MPI_Datatype etype, ADIO_Offset shared_fp, byte_off; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -145,9 +144,7 @@ int MPI_File_set_view(MPI_File mpi_fh, MPI_Offset disp, MPI_Datatype etype, /* --END ERROR HANDLING-- */ /* reset shared file pointer to zero */ - if ((fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_PVFS) && - (fh->file_system != ADIO_PVFS2) && + if (ADIO_Feature(fh, ADIO_SHARED_FP) && (fh->shared_fp_fd != ADIO_FILE_NULL)) { /* only one process needs to set it to zero, but I don't want to @@ -166,16 +163,13 @@ int MPI_File_set_view(MPI_File mpi_fh, MPI_Offset disp, MPI_Datatype etype, /* --END ERROR HANDLING-- */ } - if ((fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_PVFS) && - (fh->file_system != ADIO_PVFS2 )) + if (ADIO_Feature(fh, ADIO_SHARED_FP)) { MPI_Barrier(fh->comm); /* for above to work correctly */ } fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write.c b/ompi/mca/io/romio/romio/mpi-io/write.c index 9ba2befa17..dbd089ae77 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write.c +++ b/ompi/mca/io/romio/romio/mpi-io/write.c @@ -75,8 +75,7 @@ int MPIOI_File_write(MPI_File mpi_fh, ADIO_Offset off; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -96,6 +95,11 @@ int MPIOI_File_write(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ MPI_Type_size(datatype, &datatype_size); + + /* --BEGIN ERROR HANDLING-- */ + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); + /* --END ERROR HANDLING-- */ + if (count*datatype_size == 0) { #ifdef HAVE_STATUS_SET_BYTES @@ -133,9 +137,7 @@ int MPIOI_File_write(MPI_File mpi_fh, ADIO_WriteContig. */ - if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) && - (fh->file_system != ADIO_PVFS2)) + if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS)) { ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize); } @@ -143,9 +145,7 @@ int MPIOI_File_write(MPI_File mpi_fh, ADIO_WriteContig(fh, buf, count, datatype, file_ptr_type, off, status, &error_code); - if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && - (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)&& - (fh->file_system != ADIO_PVFS2)) + if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS)) { ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize); } @@ -163,8 +163,7 @@ int MPIOI_File_write(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write_all.c b/ompi/mca/io/romio/romio/mpi-io/write_all.c index 4ced9d4be1..f5f06ca520 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_all.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_all.c @@ -75,8 +75,7 @@ int MPIOI_File_write_all(MPI_File mpi_fh, int error_code, datatype_size; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -101,6 +100,7 @@ int MPIOI_File_write_all(MPI_File mpi_fh, MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_WRITABLE(fh, myname, error_code); MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIO_WriteStridedColl(fh, buf, count, datatype, file_ptr_type, @@ -112,8 +112,7 @@ int MPIOI_File_write_all(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write_allb.c b/ompi/mca/io/romio/romio/mpi-io/write_allb.c index 4559f9eed6..ae27a6f5b9 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_allb.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_allb.c @@ -61,8 +61,7 @@ int MPIOI_File_write_all_begin(MPI_File mpi_fh, int error_code, datatype_size; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -96,6 +95,7 @@ int MPIOI_File_write_all_begin(MPI_File mpi_fh, MPI_Type_size(datatype, &datatype_size); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ fh->split_datatype = datatype; @@ -108,8 +108,7 @@ int MPIOI_File_write_all_begin(MPI_File mpi_fh, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write_alle.c b/ompi/mca/io/romio/romio/mpi-io/write_alle.c index c1352c99b0..e11f8a48ad 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_alle.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_alle.c @@ -57,8 +57,7 @@ int MPIOI_File_write_all_end(MPI_File mpi_fh, MPIU_UNREFERENCED_ARG(buf); - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -87,8 +86,7 @@ int MPIOI_File_write_all_end(MPI_File mpi_fh, error_code = MPI_SUCCESS; fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write_ord.c b/ompi/mca/io/romio/romio/mpi-io/write_ord.c index ad3d6cf7c2..2ad4fd40bf 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_ord.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_ord.c @@ -48,8 +48,7 @@ int MPI_File_write_ordered(MPI_File mpi_fh, void *buf, int count, ADIO_Offset shared_fp; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -64,6 +63,7 @@ int MPI_File_write_ordered(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_TEST_DEFERRED(fh, myname, &error_code); @@ -102,8 +102,7 @@ int MPI_File_write_ordered(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); /* FIXME: Check for error code from WriteStridedColl? */ return error_code; diff --git a/ompi/mca/io/romio/romio/mpi-io/write_ordb.c b/ompi/mca/io/romio/romio/mpi-io/write_ordb.c index 7253d32d83..4693eb5733 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_ordb.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_ordb.c @@ -45,8 +45,7 @@ int MPI_File_write_ordered_begin(MPI_File mpi_fh, void *buf, int count, ADIO_Offset shared_fp; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -71,6 +70,7 @@ int MPI_File_write_ordered_begin(MPI_File mpi_fh, void *buf, int count, /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code); + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_TEST_DEFERRED(fh, myname, &error_code); @@ -109,8 +109,7 @@ int MPI_File_write_ordered_begin(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); /* FIXME: Check for error code from WriteStridedColl? */ return error_code; diff --git a/ompi/mca/io/romio/romio/mpi-io/write_orde.c b/ompi/mca/io/romio/romio/mpi-io/write_orde.c index af894880ef..2044a8c868 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_orde.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_orde.c @@ -43,7 +43,7 @@ int MPI_File_write_ordered_end(MPI_File mpi_fh, void *buf, MPI_Status *status) MPIU_UNREFERENCED_ARG(buf); - MPIU_THREAD_SINGLE_CS_ENTER("io"); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -68,6 +68,6 @@ int MPI_File_write_ordered_end(MPI_File mpi_fh, void *buf, MPI_Status *status) fn_exit: - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/mpi-io/write_sh.c b/ompi/mca/io/romio/romio/mpi-io/write_sh.c index 47fd9cdd6d..a1950505fa 100644 --- a/ompi/mca/io/romio/romio/mpi-io/write_sh.c +++ b/ompi/mca/io/romio/romio/mpi-io/write_sh.c @@ -48,8 +48,7 @@ int MPI_File_write_shared(MPI_File mpi_fh, void *buf, int count, ADIO_Offset off, shared_fp; ADIO_File fh; - MPIU_THREAD_SINGLE_CS_ENTER("io"); - MPIR_Nest_incr(); + MPIU_THREAD_CS_ENTER(ALLFUNC,); fh = MPIO_File_resolve(mpi_fh); @@ -60,6 +59,11 @@ int MPI_File_write_shared(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ MPI_Type_size(datatype, &datatype_size); + + /* --BEGIN ERROR HANDLING-- */ + MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code); + /* --END ERROR HANDLING-- */ + if (count*datatype_size == 0) { #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); @@ -124,7 +128,6 @@ int MPI_File_write_shared(MPI_File mpi_fh, void *buf, int count, /* --END ERROR HANDLING-- */ fn_exit: - MPIR_Nest_decr(); - MPIU_THREAD_SINGLE_CS_EXIT("io"); + MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; } diff --git a/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c b/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c index 987a4929b6..5f8606c949 100644 --- a/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c +++ b/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c @@ -23,8 +23,6 @@ #include "mpioprof.h" #endif -extern int ADIO_Init_keyval; - /*@ MPI_Info_create - Creates a new info object @@ -35,33 +33,10 @@ Output Parameters: @*/ int MPI_Info_create(MPI_Info *info) { - int flag, error_code; + int error_code; - /* first check if ADIO has been initialized. If not, initialize it */ - if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) { - - /* check if MPI itself has been initialized. If not, flag an error. - Can't initialize it here, because don't know argc, argv */ - MPI_Initialized(&flag); - if (!flag) { - FPRINTF(stderr, "Error: MPI_Init() must be called before using MPI_Info_create\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } - - MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, - (void *) 0); - - /* put a dummy attribute on MPI_COMM_WORLD, because we want the delete - function to be called when MPI_COMM_WORLD is freed. Hopefully the - MPI library frees MPI_COMM_WORLD when MPI_Finalize is called, - though the standard does not mandate this. */ - - MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0); - -/* initialize ADIO */ - - ADIO_Init( (int *)0, (char ***)0, &error_code); - } + MPIR_MPIOInit(&error_code); + if (error_code != MPI_SUCCESS) goto fn_exit; *info = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info)); (*info)->cookie = MPIR_INFO_COOKIE; @@ -71,5 +46,6 @@ int MPI_Info_create(MPI_Info *info) /* this is the first structure in this linked list. it is always kept empty. new (key,value) pairs are added after it. */ +fn_exit: return MPI_SUCCESS; } diff --git a/ompi/mca/io/romio/romio/test-internal/file_realms_test.c b/ompi/mca/io/romio/romio/test-internal/file_realms_test.c new file mode 100644 index 0000000000..029fa1b61b --- /dev/null +++ b/ompi/mca/io/romio/romio/test-internal/file_realms_test.c @@ -0,0 +1,76 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "../adio/include/adio.h" +#include "../adio/include/adio_extern.h" +#include "mpi.h" + +int main (int argc, char **argv) +{ + int i; + ADIO_File fd; + ADIO_Offset min_st_offset, max_end_offset; + int rank; + int nprocs_for_coll; + int lb; + int size, extent; + + MPI_Init (&argc, &argv); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + + if (argc != 4) { + if (!rank) + printf ("Usage: file_realms_test \n" + " simulates file_realm calculation\n"); + MPI_Finalize(); + return 1; + } + + nprocs_for_coll = atoi (argv[1]); + + min_st_offset = atoi (argv[2]); + max_end_offset = atoi (argv[3]); + + if (max_end_offset < min_st_offset){ + if (!rank) + printf ("end offset %lld is less then start offset %lld\n", + max_end_offset, min_st_offset); + MPI_Finalize(); + return 1; + } + + printf ("min_st_offset = %lld\nmax_end_offset = %lld\n", + min_st_offset, max_end_offset); + + fd = (ADIO_File) ADIOI_Malloc (sizeof (struct ADIOI_FileD)); + fd->hints = (ADIOI_Hints *) + ADIOI_Malloc (sizeof(struct ADIOI_Hints_struct)); + fd->hints->cb_nodes = nprocs_for_coll; + ADIOI_Calc_file_realms (fd, min_st_offset, max_end_offset); + + for (i=0; i < nprocs_for_coll; i++) { + printf ("file_realm_st_offs[%d] = %lld\n", i, fd->file_realm_st_offs[i]); + } + for (i=0; i < nprocs_for_coll; i++) { + MPI_Type_size (fd->file_realm_types[i], &size); + printf ("file_realm [%d] size = %d\n", i, size); + } + for (i=0; i < nprocs_for_coll; i++) { + MPI_Type_get_extent (fd->file_realm_types[i], &lb, &extent); + printf ("file_realm [%d] extent = %d\n", i, extent); + } + + for (i=0; i < nprocs_for_coll; i++) + MPI_Type_free (&fd->file_realm_types[i]); + ADIOI_Free (fd->file_realm_st_offs); + ADIOI_Free (fd->file_realm_types); + ADIOI_Free (fd->hints); + ADIOI_Free (fd); + + MPI_Finalize(); + + return 0; +} diff --git a/ompi/mca/io/romio/romio/test-internal/heap_test.c b/ompi/mca/io/romio/romio/test-internal/heap_test.c new file mode 100644 index 0000000000..c47b5385f0 --- /dev/null +++ b/ompi/mca/io/romio/romio/test-internal/heap_test.c @@ -0,0 +1,453 @@ +#include "../adio/include/heap-sort.h" +#include +#include +#include +#include + +#define PREDEF_TESTS 2 +/* test types */ +#define ALL 0 +#define RANDOM -1 +#define CUSTOM -2 + +/* ACTIONS */ +#define BUILD 0 +#define INSERT 1 +#define EXTRACT 2 +#define EXTRACT_INSERT 3 + +typedef struct { + char name[64]; + int heap_size; + int print; + int verify; + int action_arr_sz; + int *action_arr; + int *action_count_arr; + ADIO_Offset *offsets; + ADIO_Offset *correct_order; +} test_params_t; + +void print_usage(); +void print_keys(ADIO_Offset* offsets, int size); +void print_params(test_params_t *params); +int run_test(test_params_t *test); +void fill_random_test(test_params_t *params); +void init_predefined_test(test_params_t *params, int index); +void dumb_sort(test_params_t *params); + +int main(int argc, char **argv) { + int i, print = 1, verify = 1; + int adding_elements; + int curr_add_idx; + int test_type = RANDOM; + test_params_t predefined_tests[PREDEF_TESTS]; + test_params_t test; + + /* parse args */ + adding_elements = 0; + curr_add_idx = 0; + if (argc == 1) { + print_usage(); + return 1; + } + i = 1; + while (i < argc) { + if (!strcmp("-A", argv[i])) { + adding_elements = 0; + test_type = ALL; + i++; + } + else if (!strcmp("-T", argv[i])) { + adding_elements = 0; + test_type = atoi(argv[i+1]); + i += 2; + } + else if (!strcmp("-r", argv[i])) { + adding_elements = 0; + test.heap_size = atoi(argv[i+1]); + if (test.heap_size <= 0) { + printf("heap size should be a positive integer\n"); + return 1; + } + test.offsets = (ADIO_Offset *) malloc(test.heap_size*sizeof(ADIO_Offset)); + test_type = RANDOM; + i += 2; + } + else if (!strcmp("-e", argv[i])) { + test.heap_size = argc - 2; + if (test.heap_size <= 0) { + printf("need at least one key\n"); + return 1; + } + test.offsets = (ADIO_Offset *) malloc(test.heap_size*sizeof(ADIO_Offset)); + adding_elements = 1; + test_type = CUSTOM; + i++; + } + else if (!strcmp("-v", argv[i])) { + verify = 1; + i++; + } + else if (!strcmp("-p", argv[i])) { + print = 1; + i++; + } + else if (!strcmp("-V", argv[i])) { + verify = 0; + i++; + } + else if (!strcmp("-P", argv[i])) { + print = 0; + i++; + } + else if (adding_elements) { + test.offsets[curr_add_idx] = atoi(argv[i]); + curr_add_idx++; + i++; + } + else { + printf("Illegal argument: %s", argv[i]); + print_usage(); + return 1; + } + } + + if (test_type == RANDOM) { + fill_random_test(&test); + strcpy(test.name, "RANDOMIZED TEST"); + } + else if (test_type == CUSTOM) + strcpy(test.name, "CUSTOM TEST"); + if ((test_type == CUSTOM) || (test_type == RANDOM)) { + test.print = print; + test.verify = verify; + test.action_arr_sz = 2; + test.action_arr = (int *) malloc(test.action_arr_sz*sizeof(int)); + test.action_count_arr = (int *) malloc(test.action_arr_sz*sizeof(int)); + /* build the entire heap */ + /* test.action_arr[0] = BUILD; + test.action_count_arr[0] = 1; */ + /* insert keys one at a time */ + test.action_arr[0] = INSERT; + test.action_count_arr[0] = test.heap_size; + /* extract all the keys */ + test.action_arr[1] = EXTRACT; + test.action_count_arr[1] = test.heap_size; + + if (verify) { + test.correct_order = (ADIO_Offset *)malloc(test.heap_size*sizeof(ADIO_Offset)); + dumb_sort(&test); + } + if (print) + print_params(&test); + run_test(&test); + } + else { + if (test_type == ALL) { + for (i=0; i\n" + " -r Create a random test and verify of size \n" + " -e test with the space delimited list of keys\n" + " -p print parameters and keys (default)\n" + " -P do not print parameters and keys\n" + " -v verify keys (default)\n" + " -V do not verify keys\n" + ); +} + +void print_keys(ADIO_Offset *offsets, int size) { + int i; + for (i=0; i < size; i++) + printf("%lld ", offsets[i]); +} + +void print_params(test_params_t *params) { + int i; + static char action_map[3][8] = {"BUILD", "INSERT", "EXTRACT"}; + + printf("----------------Test Parameters---------------\n"); + printf("Actions:\n"); + for (i=0; iaction_arr_sz; i++) { + printf("%sx%d\n", action_map[params->action_arr[i]], + params->action_count_arr[i]); + } + + printf("Initial order :\n"); + print_keys(params->offsets, params->heap_size); + printf("\n"); + + if (params->verify) { + printf("Expected order:\n"); + print_keys(params->correct_order, params->heap_size); + printf("\n"); + } + printf("----------------------------------------------\n"); +} + +void fill_random_test(test_params_t *params) { + int i; + int max_key; + time_t seed; + int order = 0; + + time(&seed); + srand(seed); + + order = 0; + max_key = 1; + while (order < 25) { + max_key *= 10; + if (!((int) (params->heap_size / max_key))) + break; + order++; + } + for (i=0; i < params->heap_size; i++) + params->offsets[i] = (rand() % max_key); +} + +void dumb_sort(test_params_t *params) { + ADIO_Offset *offsets, tmp_offset; + int i, j; + + offsets = params->correct_order; + memcpy(offsets, params->offsets, params->heap_size*sizeof(ADIO_Offset)); + for (i=0; i < params->heap_size; i++) { + for (j=i; j < params->heap_size; j++) { + if (offsets[j] < offsets[i]) { + tmp_offset = offsets[i]; + offsets[i] = offsets[j]; + offsets[j] = tmp_offset; + } + } + } +} + +int run_test(test_params_t *test) { + heap_t myheap; + ADIO_Offset *extracted; + int stored_proc; + ADIO_Offset stored_reg_max_len; + int i, j, k, err_flag = 0; + int curr_insert_idx = 0; + int curr_extract_idx = 0; + + create_heap(&myheap, test->heap_size); + myheap.size = 0; + + extracted = (ADIO_Offset *) malloc(test->heap_size * sizeof(ADIO_Offset)); + for (i=0; i < test->action_arr_sz; i++) { + for (j=0; jaction_count_arr[i]; j++) { + switch (test->action_arr[i]) + { + case BUILD: + myheap.size = test->heap_size; + for (k=0; k < test->heap_size; k++) { + myheap.nodes[k].offset = test->offsets[k]; + myheap.nodes[k].proc = k; + } + build_heap(&myheap); + break; + case INSERT: + ADIOI_Heap_insert(&myheap, test->offsets[curr_insert_idx], + curr_insert_idx, curr_insert_idx); + curr_insert_idx++; + break; + case EXTRACT: + heap_extract_min(&myheap, &extracted[curr_extract_idx], + &stored_proc, &stored_reg_max_len); + if (test->verify && (extracted[curr_extract_idx] != + test->correct_order[curr_extract_idx])) + err_flag++; + curr_extract_idx++; + break; + case EXTRACT_INSERT: + heap_extract_min(&myheap, &extracted[curr_extract_idx], + &stored_proc, &stored_reg_max_len); + if (test->verify &&(extracted[curr_extract_idx] != + test->correct_order[curr_extract_idx])) + err_flag++; + curr_extract_idx++; + + ADIOI_Heap_insert(&myheap, test->offsets[curr_insert_idx], + curr_insert_idx, curr_insert_idx); + curr_insert_idx++; + break; + default: + break; + } + } + } + + if (test->verify) { + if (err_flag) { + printf("***%s FAILED***\n", test->name); + if (test->print) { + printf("Min extraction:\n"); + print_keys(extracted, test->heap_size); + printf("\n"); + } + } + else + printf("***%s PASSED***\n", test->name); + } + + free_heap(&myheap); + free(extracted); + /* clean up test params */ + free(test->offsets); + if (test->verify) + free(test->correct_order); + free(test->action_arr); + free(test->action_count_arr); + + return err_flag; +} + +void init_predefined_test(test_params_t *params, int index) { + + switch (index) + { + case 0: + strcpy(params->name, "TEST 1"); + params->heap_size = 15; + params->action_arr_sz = 3; + + /* allocate space */ + params->action_arr = + (int *) malloc (params->action_arr_sz*sizeof(int)); + params->action_count_arr = + (int *) malloc (params->action_arr_sz*sizeof(int)); + params->offsets = (ADIO_Offset *) malloc(params->heap_size*sizeof(ADIO_Offset)); + if (params->verify) + params->correct_order = + (ADIO_Offset *) malloc(params->heap_size*sizeof(ADIO_Offset)); + + /* Set procs */ + params->offsets[0] = 65; + params->offsets[1] = 53; + params->offsets[2] = 51; + params->offsets[3] = 74; + params->offsets[4] = 1; + params->offsets[5] = 3; + params->offsets[6] = 86; + params->offsets[7] = 82; + params->offsets[8] = 42; + params->offsets[9] = 62; + params->offsets[10] = 33; + params->offsets[11] = 12; + params->offsets[12] = 79; + params->offsets[13] = 13; + params->offsets[14] = 28; + + if (params->verify) { + params->correct_order[0] = 1; + params->correct_order[1] = 3; + params->correct_order[2] = 12; + params->correct_order[3] = 33; + params->correct_order[4] = 13; + params->correct_order[5] = 28; + params->correct_order[6] = 42; + params->correct_order[7] = 51; + params->correct_order[8] = 53; + params->correct_order[9] = 62; + params->correct_order[10] = 65; + params->correct_order[11] = 74; + params->correct_order[12] = 79; + params->correct_order[13] = 82; + params->correct_order[14] = 86; + } + + params->action_arr[0] = INSERT; + params->action_arr[1] = EXTRACT_INSERT; + params->action_arr[11] = EXTRACT; + + params->action_count_arr[0] = 10; + params->action_count_arr[1] = 5; + params->action_count_arr[11] = 10; + break; + case 1: + strcpy(params->name, "TEST 1"); + params->heap_size = 15; + params->action_arr_sz = 3; + + /* allocate space */ + params->action_arr = + (int *) malloc (params->action_arr_sz*sizeof(int)); + params->action_count_arr = + (int *) malloc (params->action_arr_sz*sizeof(int)); + params->offsets = (ADIO_Offset *) malloc(params->heap_size*sizeof(ADIO_Offset)); + if (params->verify) + params->correct_order = + (ADIO_Offset *) malloc(params->heap_size*sizeof(ADIO_Offset)); + + /* Set values */ + params->offsets[0] = 65; + params->offsets[1] = 53; + params->offsets[2] = 51; + params->offsets[3] = 74; + params->offsets[4] = 1; + params->offsets[5] = 3; + params->offsets[6] = 86; + params->offsets[7] = 82; + params->offsets[8] = 42; + params->offsets[9] = 62; + params->offsets[10] = 33; + params->offsets[11] = 12; + params->offsets[12] = 79; + params->offsets[13] = 13; + params->offsets[14] = 28; + + if (params->verify) { + params->correct_order[0] = 1; + params->correct_order[1] = 3; + params->correct_order[2] = 12; + params->correct_order[3] = 33; + params->correct_order[4] = 13; + params->correct_order[5] = 28; + params->correct_order[6] = 42; + params->correct_order[7] = 51; + params->correct_order[8] = 53; + params->correct_order[9] = 62; + params->correct_order[10] = 65; + params->correct_order[11] = 74; + params->correct_order[12] = 79; + params->correct_order[13] = 82; + params->correct_order[14] = 86; + } + + params->action_arr[0] = INSERT; + params->action_arr[1] = EXTRACT_INSERT; + params->action_arr[11] = EXTRACT; + + params->action_count_arr[0] = 10; + params->action_count_arr[1] = 5; + params->action_count_arr[11] = 10; + break; + default: + break; + } +} diff --git a/ompi/mca/io/romio/romio/test-internal/io_bounds_test.c b/ompi/mca/io/romio/romio/test-internal/io_bounds_test.c new file mode 100644 index 0000000000..b2a48c1be0 --- /dev/null +++ b/ompi/mca/io/romio/romio/test-internal/io_bounds_test.c @@ -0,0 +1,302 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (C) 2008 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "../adio/include/adio.h" +#include "../adio/include/adioi.h" +#include "../adio/include/adio_extern.h" +#include "mpi.h" + +#define PREDEF_TESTS 5 +#define MAX_OFF_LENS 4 + +typedef struct { + ADIO_Offset offset; + int count; + int type_blocklens[MAX_OFF_LENS]; + int type_indices[MAX_OFF_LENS]; + MPI_Datatype type_oldtypes[MAX_OFF_LENS]; + int type_count; + + ADIO_Offset correct_st_offset; + ADIO_Offset correct_end_offset; +} test_param_t; + +int run_test (test_param_t *test); +int setup_predefined (test_param_t *tests_arr, int count); +int print_usage (void); +int print_test_params (test_param_t *test); + +int main (int argc, char **argv) { + int rank; + int run_test_number = 0; + int failed; + int while_condition; + int i; + + test_param_t predefined_tests[PREDEF_TESTS]; + + MPI_Init (&argc, &argv); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + + if (argc != 1) { + if (!rank) { + printf ("Use only one process\n"); + print_usage (); + } + MPI_Finalize(); + return 1; + } + i = 1; + while (i < argc) { + if (!strcmp (argv[i], "-A")) { + run_test_number = 0; + i++; + } + else if (!strcmp (argv[i], "-T")) { + run_test_number = atoi (argv[i+1]); + if ((run_test_number > PREDEF_TESTS) || (run_test_number < 1)) { + if (!rank) + printf ("Invalid test number, only %d tests\n", + PREDEF_TESTS); + MPI_Finalize (); + return 1; + } + i += 2; + } + else { + if (!rank) { + printf ("Invalid Argument: %s\n", argv[i]); + print_usage (); + } + i++; + } + } + + setup_predefined (predefined_tests, PREDEF_TESTS); + + if (!run_test_number) { + i = 0; + while_condition = PREDEF_TESTS; + } + else { + i = run_test_number - 1; + while_condition = run_test_number; + } + while (i < while_condition) { + printf ("***** Test %d *****\n", i+1); + failed = run_test (&predefined_tests[i]); + printf ("******************\n"); + i++; + } + + MPI_Finalize (); + + return 0; +} + +int run_test (test_param_t *test) { + ADIO_Offset st_offset, end_offset; + MPI_File fh; + int is_contig; + int ind_err = 0, exp_err = 0; + + MPI_Datatype filetype; + + MPI_Type_struct (test->type_count, test->type_blocklens, + test->type_indices, test->type_oldtypes, &filetype); + MPI_Type_commit (&filetype); + + MPI_File_open (MPI_COMM_WORLD, "test_file.txt" , MPI_MODE_RDWR, + MPI_INFO_NULL, &fh); + + MPI_File_set_view (fh, 0, MPI_BYTE, filetype, "native", MPI_INFO_NULL); + + MPI_File_seek (fh, test->offset, MPI_SEEK_SET); + ADIOI_Calc_bounds ((ADIO_File) fh, test->count, MPI_BYTE, ADIO_INDIVIDUAL, + test->offset, &st_offset, &end_offset); + + ind_err = 0; + if (st_offset != test->correct_st_offset) { + printf ("Individual st_offset = %lld end_offset = %lld\n", + st_offset, end_offset); + ind_err = 1; + } + if (end_offset != test->correct_end_offset) { + printf ("Individual st_offset = %lld end_offset = %lld\n", + st_offset, end_offset); + ind_err = 1; + } + MPI_File_close (&fh); + if (ind_err) + printf ("Individual Calc FAILED\n"); + + MPI_File_open (MPI_COMM_WORLD, "test_file.txt" , MPI_MODE_RDWR, + MPI_INFO_NULL, &fh); + + if (!is_contig) + MPI_File_set_view (fh, 0, MPI_BYTE, filetype, "native", MPI_INFO_NULL); + + MPI_File_seek (fh, 0, MPI_SEEK_SET); + ADIOI_Calc_bounds ((ADIO_File) fh, test->count, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, test->offset, &st_offset, + &end_offset); + + exp_err = 0; + if (st_offset != test->correct_st_offset) { + printf ("Explicit st_offset = %lld end_offset = %lld\n", + st_offset, end_offset); + exp_err = 1; + } + if (end_offset != test->correct_end_offset) { + printf ("Explicit st_offset = %lld end_offset = %lld\n", + st_offset, end_offset); + exp_err = 1; + } + if (exp_err) + printf ("Explicit Calc FAILED\n"); + + MPI_File_close (&fh); + + if (!is_contig) + MPI_Type_free (&filetype); + + return (exp_err || ind_err); +} + +int print_usage () +{ + printf ( + "Usage:\n" + " io_bounds_test -A -T \n"); +} + +int print_test_params (test_param_t *test) +{ + int i; + printf ( + "I/O offset: %lld\n" + "bytes: %d\n" + "Filetype [n](disp, lens, type):\n", + test->offset, test->count); + + for (i=0; itype_count; i++) { + printf ( + " [%d](%lld, %d, ", + i, + test->type_blocklens[i], + test->type_indices[i]); + if (test->type_oldtypes[i] == MPI_BYTE) { + printf ( "%s)\n", "MPI_BYTE"); + } + else if (test->type_oldtypes[i] == MPI_UB) { + printf ( "%s)\n", "MPI_UB"); + } + else if (test->type_oldtypes[i] == MPI_LB) { + printf ( "%s)\n", "MPI_LB"); + } + } + printf ( + "Expected Start offset: %lld\n" + "Expected End offset: %lld\n", + test->correct_st_offset, + test->correct_end_offset); +} + +int setup_predefined (test_param_t *tests_arr, int count) +{ + int i; + for (i=0; i < PREDEF_TESTS; i++) { + switch (i) + { + case 0: + tests_arr[i].offset = 0; + tests_arr[i].count = 0; + tests_arr[i].type_count = 0; + tests_arr[i].type_indices[0] = 0; + tests_arr[i].type_blocklens[0] = 0; + tests_arr[i].type_oldtypes[0] = MPI_BYTE; + tests_arr[i].type_indices[1] = 0; + tests_arr[i].type_blocklens[1] = 0; + tests_arr[i].type_oldtypes[1] = MPI_BYTE; + tests_arr[i].type_indices[2] = 0; + tests_arr[i].type_blocklens[2] = 0; + tests_arr[i].type_oldtypes[2] = MPI_BYTE; + tests_arr[i].type_indices[3] = 0; + tests_arr[i].type_blocklens[3] = 0; + tests_arr[i].type_oldtypes[3] = MPI_BYTE; + break; + case 1: + tests_arr[i].offset = 0; + tests_arr[i].count = 0; + tests_arr[i].type_count = 0; + tests_arr[i].type_indices[0] = 0; + tests_arr[i].type_blocklens[0] = 0; + tests_arr[i].type_oldtypes[0] = MPI_BYTE; + tests_arr[i].type_indices[1] = 0; + tests_arr[i].type_blocklens[1] = 0; + tests_arr[i].type_oldtypes[1] = MPI_BYTE; + tests_arr[i].type_indices[2] = 0; + tests_arr[i].type_blocklens[2] = 0; + tests_arr[i].type_oldtypes[2] = MPI_BYTE; + tests_arr[i].type_indices[3] = 0; + tests_arr[i].type_blocklens[3] = 0; + tests_arr[i].type_oldtypes[3] = MPI_BYTE; + break; + case 2: + tests_arr[i].offset = 0; + tests_arr[i].count = 0; + tests_arr[i].type_count = 0; + tests_arr[i].type_indices[0] = 0; + tests_arr[i].type_blocklens[0] = 0; + tests_arr[i].type_oldtypes[0] = MPI_BYTE; + tests_arr[i].type_indices[1] = 0; + tests_arr[i].type_blocklens[1] = 0; + tests_arr[i].type_oldtypes[1] = MPI_BYTE; + tests_arr[i].type_indices[2] = 0; + tests_arr[i].type_blocklens[2] = 0; + tests_arr[i].type_oldtypes[2] = MPI_BYTE; + tests_arr[i].type_indices[3] = 0; + tests_arr[i].type_blocklens[3] = 0; + tests_arr[i].type_oldtypes[3] = MPI_BYTE; + break; + case 3: + tests_arr[i].offset = 0; + tests_arr[i].count = 0; + tests_arr[i].type_count = 0; + tests_arr[i].type_indices[0] = 0; + tests_arr[i].type_blocklens[0] = 0; + tests_arr[i].type_oldtypes[0] = MPI_BYTE; + tests_arr[i].type_indices[1] = 0; + tests_arr[i].type_blocklens[1] = 0; + tests_arr[i].type_oldtypes[1] = MPI_BYTE; + tests_arr[i].type_indices[2] = 0; + tests_arr[i].type_blocklens[2] = 0; + tests_arr[i].type_oldtypes[2] = MPI_BYTE; + tests_arr[i].type_indices[3] = 0; + tests_arr[i].type_blocklens[3] = 0; + tests_arr[i].type_oldtypes[3] = MPI_BYTE; + break; + case 4: + tests_arr[i].offset = 0; + tests_arr[i].count = 0; + tests_arr[i].type_count = 0; + tests_arr[i].type_indices[0] = 0; + tests_arr[i].type_blocklens[0] = 0; + tests_arr[i].type_oldtypes[0] = MPI_BYTE; + tests_arr[i].type_indices[1] = 0; + tests_arr[i].type_blocklens[1] = 0; + tests_arr[i].type_oldtypes[1] = MPI_BYTE; + tests_arr[i].type_indices[2] = 0; + tests_arr[i].type_blocklens[2] = 0; + tests_arr[i].type_oldtypes[2] = MPI_BYTE; + tests_arr[i].type_indices[3] = 0; + tests_arr[i].type_blocklens[3] = 0; + tests_arr[i].type_oldtypes[3] = MPI_BYTE; + break; + } + } + return 0; +} diff --git a/ompi/mca/io/romio/romio/test/Makefile.in b/ompi/mca/io/romio/romio/test/Makefile.in index 079f983b27..bf6ff4f097 100644 --- a/ompi/mca/io/romio/romio/test/Makefile.in +++ b/ompi/mca/io/romio/romio/test/Makefile.in @@ -3,13 +3,13 @@ F77 = @TEST_F77@ INCLUDE_DIR = @ROMIO_INCLUDE@ # because := is not universally avalible, we have to play games to use the # user-specified LDFLAGS and OUR_LIBS env. variables (if set) -OUR_LIBS = @TEST_LIBNAME@ @MPI_LIB@ @ROMIO_LIBLIST@ ${LDFLAGS} ${LIBS} +OUR_LIBS = @TEST_LIBNAME@ @MPI_LIB@ ${LDFLAGS} ${LIBS} USER_CFLAGS = @CPPFLAGS@ @USER_CFLAGS@ $(INCLUDE_DIR) USER_FFLAGS = @CPPFLAGS@ @USER_FFLAGS@ $(INCLUDE_DIR) CTESTS = simple perf async coll_test coll_perf misc file_info excl large_array \ atomicity noncontig i_noncontig noncontig_coll split_coll shared_fp \ large_file psimple error status noncontig_coll2 aggregation1 aggregation2 \ - async-multiple ordered_fp + async-multiple ordered_fp hindexed FTESTS = fcoll_test fperf fmisc pfcoll_test srcdir=@srcdir@ diff --git a/ompi/mca/io/romio/romio/test/Mfile.in b/ompi/mca/io/romio/romio/test/Mfile.in deleted file mode 100644 index c2e1c29825..0000000000 --- a/ompi/mca/io/romio/romio/test/Mfile.in +++ /dev/null @@ -1,84 +0,0 @@ -ALL: default - -# This is a special Makefile.in source for use by the test suite (see the -# configure in examples/test) -##### User configurable options ##### - -MPIR_HOME = @MPIR_HOME@ -CC = @MPICC@ -CLINKER = @MPICC@ -CCC = @MPICPLUSPLUS@ -CCLINKER = $(CCC) -F77 = @MPIF77@ -F90BASE = @MPIF90BASE@ -F90 = @MPIF90@ -FLINKER = @MPIF77@ -OPTFLAGS = @OPTFLAGS@ -MPIFDEP = @MPIFDEP@ -### End User configurable options ### - -SHELL = /bin/sh -prefix = @prefix@ -top_srcdir = @top_srcdir@ -srcdir = @srcdir@ -@VPATH@ - -PROFLIB = -CFLAGS = @CFLAGS@ @DEFS@ $(OPTFLAGS) -CCFLAGS = $(CFLAGS) -FFLAGS = $(OPTFLAGS) @FFLAGS@ -# Use LIBS to add any special libraries for C programs -LIBS = @LIB_PATH@ @LIB_LIST@ -# Use FLIBS to add any special libraries for Fortran programs -FLIBS = @FLIB_PATH@ @LIB_LIST@ @F77EXTRALIBS@ -EXECS = $(CTESTS) $(FTESTS) -OTHEREXECS = -CTESTS = simple perf async coll_test coll_perf misc file_info excl \ - large_array \ - atomicity noncontig i_noncontig noncontig_coll split_coll shared_fp \ - large_file psimple error status noncontig_coll2 -FTESTS = fcoll_test fperf fmisc pfcoll_test - -default: $(EXECS) - -# -# Note that runtests builds the executables as required -testing: - -./runtests $(TESTARGS) - -all: testing - -fortran_tests: $(FTESTS) -# -# The Fortran tests must be built with the Fortran linker -fperf: fperf.f - $(F77) $(USER_FFLAGS) -o $@ $< $(FLIBS) - -# Some of the Fortran tests must be derived, so their code will be in the -# LOCAL directory -fcoll_test: fcoll_test.f - $(F77) $(USER_FFLAGS) -o fcoll_test fcoll_test.f $(FLIBS) - -fmisc: fmisc.f - $(F77) $(USER_FFLAGS) -o fmisc fmisc.f $(FLIBS) - -pfcoll_test: pfcoll_test.f - $(F77) $(USER_FFLAGS) -o pfcoll_test pfcoll_test.f $(FLIBS) - -# -# Make sure that we remove executables for specific architectures -clean: - @-rm -f *.o *~ PI* $(EXECS) *.out core pt2pt.diff $(OTHEREXECS) \ - *.trace rdb.* startup.* mpif.h ${srcdir}/*.o *.stdo - @-rm -f work.pc work.pcl - @-for file in $(EXECS) Makefile ; do \ - rm -f $$file.sun4 $$file.alpha $$file.IRIX $$file.freebsd ;\ - done -.c: - $(CC) $(CFLAGS) -o $* $< $(LIBS) -.c.o: - $(CC) $(CFLAGS) -c $< -.o: - ${CLINKER} $(OPTFLAGS) -o $* $*.o $(LIBS) -.f.o: - $(F77) $(FFLAGS) -c $< diff --git a/ompi/mca/io/romio/romio/test/aggregation1.c b/ompi/mca/io/romio/romio/test/aggregation1.c index 8c6204ad00..3b3e8bef23 100644 --- a/ompi/mca/io/romio/romio/test/aggregation1.c +++ b/ompi/mca/io/romio/romio/test/aggregation1.c @@ -1,3 +1,9 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + /* Test case from John Bent (ROMIO req #835) * Aggregation code was not handling certain access patterns when collective * buffering forced */ @@ -57,6 +63,7 @@ print_hints( int rank, MPI_File *mfh ) { MPI_Info_get( info, key, 1024, value, &dummy_int ); printf( "%s\n", value ); } + MPI_Info_free(&info); } MPI_Barrier( MPI_COMM_WORLD ); } @@ -163,6 +170,7 @@ read_file( char *target, int rank, MPI_Info *info, int *corrupt_blocks ) { if( (mpi_ret = MPI_File_close( &rfh ) ) != MPI_SUCCESS ) { fatal_error( mpi_ret, NULL, "close for read" ); } + free(verify_buf); } @@ -244,8 +252,9 @@ main( int argc, char *argv[] ) { corrupt_blocks, nproc * NUM_OBJS ); } } + MPI_Info_free(&info); MPI_Finalize(); - + free(prog); exit( 0 ); } diff --git a/ompi/mca/io/romio/romio/test/aggregation2.c b/ompi/mca/io/romio/romio/test/aggregation2.c index b5a8c8ce67..3c57c1d206 100644 --- a/ompi/mca/io/romio/romio/test/aggregation2.c +++ b/ompi/mca/io/romio/romio/test/aggregation2.c @@ -1,3 +1,9 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + /* Look for regressions in aggregator code. A more simple access pattern than * aggregation1 */ @@ -76,6 +82,7 @@ int main(int argc, char ** argv) fprintf( stdout, " No Errors\n" ); } } + MPI_Info_free(&info); MPI_Finalize(); return 0; diff --git a/ompi/mca/io/romio/romio/test/big_extents.c b/ompi/mca/io/romio/romio/test/big_extents.c new file mode 100644 index 0000000000..92f0df09f4 --- /dev/null +++ b/ompi/mca/io/romio/romio/test/big_extents.c @@ -0,0 +1,211 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2007 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +/* a test to exercise very large extents: on most platforms with 32 bit + * integers, we'd expect these tests to give unexpected values. On platforms + * with 64 bit integers, these tests will be fine. On BlueGene we're not sure + * yet :> + */ + + +#include +#include +#include +#include + +#define CHECK(fn) {int errcode; errcode = (fn); if (errcode != MPI_SUCCESS) handle_error(errcode, NULL); } + + +static void handle_error(int errcode, char *str) +{ + char msg[MPI_MAX_ERROR_STRING]; + int resultlen; + MPI_Error_string(errcode, msg, &resultlen); + fprintf(stderr, "%s: %s\n", str, msg); + MPI_Abort(MPI_COMM_WORLD, 1); +} + +static void typestats(MPI_Datatype type) +{ + MPI_Aint lb, extent; + int size; + + MPI_Type_get_extent(type, &lb, &extent); + MPI_Type_size(type, &size); + + printf("dtype %d: lb = %ld extent = %ld size = %d...", + type, (long)lb, (long)extent, size); + +} + +static int verify_type(char *filename, MPI_Datatype type, + int64_t expected_extent, int do_coll) +{ + int rank, canary, tsize; + int compare=-1; + int errs=0, toterrs=0; + MPI_Status status; + MPI_File fh; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + CHECK( MPI_File_open(MPI_COMM_WORLD, filename, + MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh)); + CHECK( MPI_File_set_view(fh, rank*sizeof(int), + MPI_BYTE, type, "native", MPI_INFO_NULL)); + + MPI_Type_size(type, &tsize); + + canary=rank+1000000; + + /* skip over first instance of type */ + if (do_coll) { + CHECK( MPI_File_write_at_all(fh, tsize, &canary, 1, MPI_INT, &status)); + } else { + CHECK( MPI_File_write_at(fh, tsize, &canary, 1, MPI_INT, &status)); + } + + CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", + MPI_INFO_NULL)); + + if (do_coll) { + CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank, + &compare, 1, MPI_INT, &status)); + } else { + CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank, + &compare, 1, MPI_INT, &status)); + } + + if (compare != canary) + errs=1; + MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MPI_File_close(&fh); + + if (toterrs) { + printf("%d: got %d expected %d\n", rank, compare, canary); + /* keep file if there's an error */ + } else { + if (rank == 0) MPI_File_delete(filename, MPI_INFO_NULL); + } + + return (toterrs); + +} + +static int testtype(char *filename, MPI_Datatype type, int64_t expected_extent) +{ + int rank, ret, errs=0; + int collective=1, nocollective=0; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (!rank) typestats(type); + + ret = verify_type(filename, type, expected_extent, nocollective); + if (ret) { + errs++; + fprintf(stderr, "type %d failed indep\n", type); + } else + if (!rank) printf("indep: OK "); + + ret = verify_type(filename, type, expected_extent, collective); + if (ret) { + errs++; + fprintf(stderr, "type %d failed collective\n", type); + } else + if (!rank) printf("coll: OK\n"); + + return errs; +} + +int main(int argc, char **argv) +{ + int count=2; + int blocks[2]; + int disps[2]; + + int ndims=2; + int sizes[2]; + int subs[2]; + int starts[2]; + + MPI_Datatype baseindex, indexed1G, indexed3G, indexed6G; + MPI_Datatype subarray1G, subarray3G, subarray6G; + int ret, rank; + + MPI_Init(&argc, &argv); + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* base type: 1MB indexed type of ints*/ + count = 2; + blocks[0] = 1; + disps[0] = 0; + blocks[1] = 1; + disps[1] = 1024*256-1; + + MPI_Type_indexed(count, blocks, disps, MPI_INT, &baseindex); + /* simple case: 1GB extent */ + MPI_Type_contiguous(1024, baseindex, &indexed1G); + MPI_Type_commit(&indexed1G); + + /* a little trickier: 3Gb extent */ + MPI_Type_contiguous(3072, baseindex, &indexed3G); + MPI_Type_commit(&indexed3G); + + /* and finally 6GB extent */ + MPI_Type_contiguous(6144, baseindex, &indexed6G); + MPI_Type_commit(&indexed6G); + + /* TODO: + * - add a darray test + * - add a test with crazy extents */ + sizes[0] = 1024*16; + sizes[1] = 1024*16; + subs[0] = subs[1] = 256; + starts[0] = starts[1] = 0; + + MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_ORDER_C, MPI_INT, &subarray1G); + MPI_Type_commit(&subarray1G); + + sizes[1] = 1024*16*3; + MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_ORDER_C, MPI_INT, &subarray3G); + MPI_Type_commit(&subarray3G); + + sizes[1] = 1024*16*6; + MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_ORDER_C, MPI_INT, &subarray6G); + MPI_Type_commit(&subarray6G); + + /* assume command line arguments make it out to all processes */ + ret = testtype(argv[1], indexed1G, (int64_t)1024*1024*1024); + + ret = testtype(argv[1], indexed3G, (int64_t)1024*1024*1024*3); + + ret = testtype(argv[1], indexed6G, (int64_t)1024*1024*1024*6); + + ret = testtype(argv[1], subarray1G, (int64_t)1024*1024*1024); + + ret = testtype(argv[1], subarray3G, (int64_t)1024*1024*1024*3); + + ret = testtype(argv[1], subarray6G, (int64_t)1024*1024*1024*6); + + if(!ret && !rank) fprintf(stderr, " No Errors\n"); + + MPI_Finalize(); + return (-ret); + +} +/* + * vim: ts=8 sts=4 sw=4 noexpandtab + */ diff --git a/ompi/mca/io/romio/romio/test/coll_test.c b/ompi/mca/io/romio/romio/test/coll_test.c index 661c11d584..d9a96d7b97 100644 --- a/ompi/mca/io/romio/romio/test/coll_test.c +++ b/ompi/mca/io/romio/romio/test/coll_test.c @@ -125,20 +125,6 @@ int main(int argc, char **argv) } /* end of initialization */ -#if 0 - /* XXX: make the romio testcases handle more than one command line - * argument.. like --aggregation */ - - /* for deferred open: hint stuff */ - MPI_Info_create(&info); - MPI_Info_set(info, "romio_no_indep_rw", "true"); - MPI_Info_set(info, "romio_cb_read", "enable"); - MPI_Info_set(info, "romio_cb_write", "enable"); - MPI_Info_set(info, "cb_nodes", "1"); - MPI_Info_set(info, "cb_config_list", "schwinn.mcs.anl.gov:1"); -#endif - - /* write the array to the file */ errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); @@ -152,6 +138,31 @@ int main(int argc, char **argv) errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); + if (!mynod) { + /* wkl suggests potential for false " No Errors" if both read + * and write use the same file view */ + /* solution: rank 0 reads entire file and checks write values */ + errcode = MPI_File_open(MPI_COMM_SELF, filename, + MPI_MODE_RDONLY, info, &fh); + if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); + + readbuf = (int *) malloc(array_size * sizeof(int)); + errcode = MPI_File_read(fh, readbuf, array_size, MPI_INT, &status); + if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read"); + + errcode = MPI_File_close(&fh); + if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); + + for (i=0; i #include #include +/* this test wants to compare the hints it gets from a file with a set of + * default hints. These hints are specific to the MPI-IO implementation, so + * pick one of the following profiles to use */ + +# define DFLT_CB_BUFFER_SIZE 16777216 +# define DFLT_IND_RD_BUFFER_SIZE 4194304 +# define DFLT_IND_WR_BUFFER_SIZE 524288 +# define DFLT_ROMIO_CB_READ "automatic" +# define DFLT_ROMIO_CB_WRITE "automatic" /* #undef INFO_DEBUG */ -/* Set verbose to 0 only if you want no information about any failure */ -static int verbose = 1; +/* Test will print out information about unexpected hint keys or values that + * differ from the default. Since this is often interesting but rarely an + * error, default will be to increment errror cound for true error conditions + * but not print out these "interesting" non-error cases. */ + +static int verbose = 0; int main(int argc, char **argv) { @@ -74,35 +91,28 @@ int main(int argc, char **argv) /* no check */ } else if (!strcmp("cb_buffer_size", key)) { - if (atoi(value) != 16777216) { + if (atoi(value) != DFLT_CB_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "cb_buffer_size is %d; should be %d\n", - atoi(value), 16777216); + atoi(value), DFLT_CB_BUFFER_SIZE); } } else if (!strcmp("romio_cb_read", key)) { - if (strcmp("automatic", value)) { + if (strcmp(DFLT_ROMIO_CB_READ, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n", - value, "automatic"); + value, DFLT_ROMIO_CB_READ); } } else if (!strcmp("romio_cb_write", key)) { - if (strcmp("automatic", value)) { + if (strcmp(DFLT_ROMIO_CB_WRITE, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n", - value, "automatic"); + value, DFLT_ROMIO_CB_WRITE); } } else if (!strcmp("cb_nodes", key)) { /* unreliable test -- just ignore value */ -#if 0 - if (atoi(value) != 1) { - errs++; - if (verbose) fprintf(stderr, "cb_nodes is %d; should be %d\n", atoi(value), - 1); - } -#endif } else if (!strcmp("romio_no_indep_rw", key)) { if (strcmp("false", value)) { @@ -112,17 +122,17 @@ int main(int argc, char **argv) } } else if (!strcmp("ind_rd_buffer_size", key)) { - if (atoi(value) != 4194304) { + if (atoi(value) != DFLT_IND_RD_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "ind_rd_buffer_size is %d; should be %d\n", - atoi(value), 4194304); + atoi(value), DFLT_IND_RD_BUFFER_SIZE); } } else if (!strcmp("ind_wr_buffer_size", key)) { - if (atoi(value) != 524288) { + if (atoi(value) != DFLT_IND_WR_BUFFER_SIZE) { errs++; if (verbose) fprintf(stderr, "ind_wr_buffer_size is %d; should be %d\n", - atoi(value), 524288); + atoi(value), DFLT_IND_WR_BUFFER_SIZE); } } else if (!strcmp("romio_ds_read", key)) { @@ -134,20 +144,26 @@ int main(int argc, char **argv) } else if (!strcmp("romio_ds_write", key)) { /* Unreliable test -- value is file system dependent. Ignore. */ -#if 0 - if (strcmp("automatic", value)) { - errs++; - if (verbose) fprintf(stderr, "romio_ds_write is set to %s; should be %s\n", - value, "automatic"); - } -#endif } else if (!strcmp("cb_config_list", key)) { +#ifndef SKIP_CB_CONFIG_LIST_TEST if (strcmp("*:1", value)) { errs++; if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n", value, "*:1"); } +#endif + } + /* don't care about the defaults for these keys */ + else if (!strcmp("romio_cb_pfr", key)) { + } + else if (!strcmp("romio_cb_fr_types", key)) { + } + else if (!strcmp("romio_cb_fr_alignment", key)) { + } + else if (!strcmp("romio_cb_ds_threshold", key)) { + } + else if (!strcmp("romio_cb_alltoall", key)) { } else { if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key); @@ -202,8 +218,10 @@ int main(int argc, char **argv) /* the striping unit in bytes */ MPI_Info_set(info, "striping_unit", "131072"); +#ifndef SKIP_CB_CONFIG_LIST_TEST /* set the cb_config_list so we'll get deterministic cb_nodes output */ MPI_Info_set(info, "cb_config_list", "*:*"); +#endif /* the I/O device number from which to start striping the file. accepted only if 0 <= value < default_striping_factor; @@ -251,17 +269,17 @@ int main(int argc, char **argv) } } else if (!strcmp("romio_cb_read", key)) { - if (strcmp("automatic", value)) { + if (strcmp(DFLT_ROMIO_CB_READ, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n", - value, "automatic"); + value, DFLT_ROMIO_CB_READ); } } else if (!strcmp("romio_cb_write", key)) { - if (strcmp("automatic", value)) { + if (strcmp(DFLT_ROMIO_CB_WRITE, value)) { errs++; if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n", - value, "automatic"); + value, DFLT_ROMIO_CB_WRITE); } } else if (!strcmp("cb_nodes", key)) { @@ -301,21 +319,52 @@ int main(int argc, char **argv) } else if (!strcmp("romio_ds_write", key)) { /* Unreliable test -- value is file system dependent. Ignore. */ -#if 0 - if (strcmp("automatic", value)) { - errs++; - if (verbose) fprintf(stderr, "romio_ds_write is set to %s; should be %s\n", - value, "automatic"); - } -#endif } else if (!strcmp("cb_config_list", key)) { +#ifndef SKIP_CB_CONFIG_LIST_TEST if (strcmp("*:*", value)) { errs++; if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n", value, "*:*"); } +#endif } + else if (!strcmp("romio_cb_pfr", key)) { + if(strcmp("disable", value)) { + errs++; + if (verbose) fprintf(stderr, "romio_cb_pfr is set to %s; should be %s\n", + value, "automatic"); + } + } + else if (!strcmp("romio_cb_fr_types", key)) { + if(strcmp("aar", value)) { + errs++; + if (verbose) fprintf(stderr, "romio_cb_fr_types is set to %s; should be %s\n", + value, "aar"); + } + } + else if (!strcmp("romio_cb_fr_alignment", key)) { + if(strcmp("1", value)) { + errs++; + if (verbose) fprintf(stderr, "romio_cb_fr_alignment is set to %s; should be %s\n", + value, "1"); + } + } + else if (!strcmp("romio_cb_ds_threshold", key)) { + if(strcmp("0", value)) { + errs++; + if (verbose) fprintf(stderr, "romio_cb_ds_threshold is set to %s; should be %s\n", + value, "0"); + } + } + else if (!strcmp("romio_cb_alltoall", key)) { + if(strcmp("automatic", value)) { + errs++; + if (verbose) fprintf(stderr, "romio_cb_alltoall is set to %s; should be %s\n", + value, "automatic"); + } + } + else { if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key); } @@ -334,12 +383,3 @@ int main(int argc, char **argv) MPI_Finalize(); return 0; } - - - - - - - - - diff --git a/ompi/mca/io/romio/romio/test/hindexed.c b/ompi/mca/io/romio/romio/test/hindexed.c new file mode 100644 index 0000000000..6e0b332555 --- /dev/null +++ b/ompi/mca/io/romio/romio/test/hindexed.c @@ -0,0 +1,250 @@ +/* Wei-keng Liao (wkliao@ece.northwestern.edu) September 8, 2008 */ +#include +#include +#include +#include + +#define YLEN 5 +#define XLEN 10 +#define SUB_XLEN 3 + +/* rjl: I was just too lazy to compute this at run-time */ +char compare_buf[XLEN*4][YLEN*4] = { + {'0','1','2',0,0,'3','4','5',0,0,'D','E','F',0,0,'G','H','I'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'6','7','8',0,0,'9',':',';',0,0,'J','K','L',0,0,'M','N','O'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'X','Y','Z',0,0,'[','\\',']',0,0,'l','m','n',0,0,'o','p','q'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'^','_','`',0,0,'a','b','c',0,0,'r','s','t',0,0,'u','v','w'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'0','1','2',0,0,'3','4','5',0,0,'D','E','F',0,0,'G','H','I'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'6','7','8',0,0,'9',':',';',0,0,'J','K','L',0,0,'M','N','O'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'X','Y','Z',0,0,'[','\\',']',0,0,'l','m','n',0,0,'o','p','q'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {'^','_','`',0,0,'a','b','c',0,0,'r','s','t',0,0,'u','v','w'}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +}; + + +/* set this if you want a dump of the global array +#define VERBOSE 1 +*/ + +/*----< main() >------------------------------------------------------------*/ +int main(int argc, char **argv) { + int i, j, err, rank, np, num_io; + char *buf, *filename; + int rank_dim[2], array_of_sizes[2]; + int array_of_subsizes[2]; + int count, *blocklengths, global_array_size, ftype_size; + MPI_Aint *displacements; + MPI_File fh; + MPI_Datatype ftype; + MPI_Status status; + MPI_Offset offset=0; + int nr_errors=0; +#ifdef VERBOSE + int k; +#endif + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &np); + + if (np != 4) { + if (!rank) printf("Please run with 4 processes. Exiting ...\n\n"); + MPI_Finalize(); + return 1; + } + + filename = argv[1]; + + num_io = 2; + + /*-----------------------------------------------------------------------*/ + /* process rank in each dimension */ + rank_dim[0] = rank / 2; + rank_dim[1] = rank % 2; + + /* global 2D array size */ + array_of_sizes[0] = YLEN * 2; + array_of_sizes[1] = XLEN * 2; + + global_array_size = array_of_sizes[0] * array_of_sizes[1]; + + array_of_subsizes[0] = YLEN / 2; + array_of_subsizes[1] = XLEN * SUB_XLEN / 5; + + offset = rank_dim[0] * YLEN * array_of_sizes[1] + + rank_dim[1] * XLEN; + + /* define data type for file view */ + count = array_of_subsizes[0] * 2; /* 2 is the no. blocks along X */ + blocklengths = (int*) malloc(count*sizeof(int)); + displacements = (MPI_Aint*)malloc(count*sizeof(MPI_Aint)); + for (i=0; i0 && j%XLEN==0) printf(" "); + if (*ptr != 0) + printf(" %c",*ptr); + else + printf(" "); + ptr++; + } + printf("\n"); + } + printf("\n"); + } +#endif + ptr = rd_buf; + for(i=0; i<2*YLEN*num_io; i++) { + for(j=0; j<2*XLEN; j++) { + if( *ptr != compare_buf[i][j]) { + fprintf(stderr, "expected %d got %d at [%d][%d]\n", + *ptr, compare_buf[i][j], i, j); + nr_errors++; + } + ptr++; + } + } + free(rd_buf); + + if (nr_errors == 0) + fprintf(stdout, " No Errors\n"); + else + fprintf(stderr, "Found %d errors\n", nr_errors); + } + + MPI_Type_free(&ftype); + MPI_Finalize(); + return 0; +} + +/* command-line outputs are: (the global array is written twice) + +% mpiexec -n 4 wkl_subarray +------------------------------------------------------- + [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 ] + +[ 0] 0 1 2 3 4 5 D E F G H I +[ 1] +[ 2] 6 7 8 9 : ; J K L M N O +[ 3] +[ 4] +[ 5] X Y Z [ \ ] l m n o p q +[ 6] +[ 7] ^ _ ` a b c r s t u v w +[ 8] +[ 9] + +[10] 0 1 2 3 4 5 D E F G H I +[11] +[12] 6 7 8 9 : ; J K L M N O +[13] +[14] +[15] X Y Z [ \ ] l m n o p q +[16] +[17] ^ _ ` a b c r s t u v w +[18] +[19] + +*/ + diff --git a/ompi/mca/io/romio/romio/test/noncontig_coll2.c b/ompi/mca/io/romio/romio/test/noncontig_coll2.c index 5a28d48dde..e684048c12 100644 --- a/ompi/mca/io/romio/romio/test/noncontig_coll2.c +++ b/ompi/mca/io/romio/romio/test/noncontig_coll2.c @@ -25,8 +25,6 @@ int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, char *msg, int verbose); -static int cb_config_list_keyval = MPI_KEYVAL_INVALID; - #define ADIOI_Free free #define ADIOI_Malloc malloc #define FPRINTF fprintf @@ -40,16 +38,6 @@ struct ADIO_cb_name_arrayD { typedef struct ADIO_cb_name_arrayD *ADIO_cb_name_array; void handle_error(int errcode, char *str); -int cb_copy_name_array(MPI_Comm comm, - int *keyval, - void *extra, - void *attr_in, - void **attr_out, - int *flag); -int cb_delete_name_array(MPI_Comm comm, - int *keyval, - void *attr_val, - void *extra); int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp); void default_str(int mynod, int len, ADIO_cb_name_array array, char *dest); void reverse_str(int mynod, int len, ADIO_cb_name_array array, char *dest); @@ -65,51 +53,7 @@ void handle_error(int errcode, char *str) fprintf(stderr, "%s: %s\n", str, msg); MPI_Abort(MPI_COMM_WORLD, 1); } - /* cb_copy_name_array() - attribute copy routine - */ -int cb_copy_name_array(MPI_Comm comm, - int *keyval, - void *extra, - void *attr_in, - void **attr_out, - int *flag) -{ - ADIO_cb_name_array array; - - array = (ADIO_cb_name_array) attr_in; - array->refct++; - - *attr_out = attr_in; - *flag = 1; /* make a copy in the new communicator */ - - return MPI_SUCCESS; -} - -/* cb_delete_name_array() - attribute destructor - */ -int cb_delete_name_array(MPI_Comm comm, - int *keyval, - void *attr_val, - void *extra) -{ - int i; - ADIO_cb_name_array array; - - array = (ADIO_cb_name_array) attr_val; - array->refct--; - - if (array->refct <= 0) { - /* time to free the structures (names, array of ptrs to names, struct) - */ - for (i=0; i < array->namect; i++) { - ADIOI_Free(array->names[i]); - } - if (array->names != NULL) ADIOI_Free(array->names); - ADIOI_Free(array); - } - - return MPI_SUCCESS; -} + /* cb_gather_name_array() - gather a list of processor names from all processes * in a communicator and store them on rank 0. @@ -125,24 +69,16 @@ int cb_delete_name_array(MPI_Comm comm, */ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) { + /* this is copied from ROMIO, but since this test is for correctness, + * not performance, note that we have removed the parts where ROMIO + * uses a keyval to cache the name array. We'll just rebuild it if we + * need to */ + char my_procname[MPI_MAX_PROCESSOR_NAME], **procname = 0; int *procname_len = NULL, my_procname_len, *disp = NULL, i; - int commsize, commrank, found; + int commsize, commrank; ADIO_cb_name_array array = NULL; - if (cb_config_list_keyval == MPI_KEYVAL_INVALID) { - MPI_Keyval_create((MPI_Copy_function *) cb_copy_name_array, - (MPI_Delete_function *) cb_delete_name_array, - &cb_config_list_keyval, NULL); - } - else { - MPI_Attr_get(comm, cb_config_list_keyval, (void *) &array, &found); - if (found) { - *arrayp = array; - return 0; - } - } - MPI_Comm_size(comm, &commsize); MPI_Comm_rank(comm, &commrank); @@ -240,11 +176,6 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) #endif } - /* store the attribute; we want to store SOMETHING on all processes - * so that they can all tell if we have gone through this procedure - * or not for the given communicator. - */ - MPI_Attr_put(comm, cb_config_list_keyval, array); *arrayp = array; return 0; } diff --git a/ompi/mca/io/romio/romio/test/rtest.in b/ompi/mca/io/romio/romio/test/rtest.in deleted file mode 100644 index c6fd305843..0000000000 --- a/ompi/mca/io/romio/romio/test/rtest.in +++ /dev/null @@ -1,131 +0,0 @@ -#! /bin/sh -# -# Run some of the tests. If any arguments are provided, pass them to the -# test programs. -# -# -mvhome is needed for the ANL SP, and is ignored by others -args=@MPIRUNARGS@ -device=@DEVICE@ -top_srcdir=@top_srcdir@ -srcdir=@srcdir@ -MPICH_VERSION=@MPICH_VERSION@ -STOPFILE=$HOME/.stopmpichtests -mpirun=${MPIRUN:-"@MPIRUN@"} -MAKE="@MAKE@" -FILENAME=test -# -# Load basic procedures -. ${top_srcdir}/runbase -# -# Set mpirun to the name/path of the mpirun program -FindMPIRUN -# -# -test_mpi2=@HAS_MPI2@ -runtests=1 -quiet=0 -makeeach=0 -writesummaryfile=no -MAKE="@MAKE@" -for arg in "$@" ; do - case $arg in - -checkonly ) - runtests=0 - ;; - -margs=*) - margs=`echo $arg | sed 's/-margs=//'` - args="$args $margs" - ;; - -summaryfile=*) - writesummaryfile=yes - summaryfile=`echo A$arg | sed 's/A-summaryfile=//'` - ;; - -small) - makeeach=1 - ;; - -fname=*) - FILENAME=`echo $arg|sed 's/-*fname=//'` - ;; - -quiet) - shift - quiet=1 - ;; - -help|-u) - echo "runtests [-checkonly] [-margs='...']" - echo "run tests in this directory. If -checkonly set, just run" - echo "the differences check (do NO rerun the test programs)." - echo "If -margs is used, these options are passed to mpirun." - echo "If -small is used, the examples are built, run, and deleted." - exit 1 - ;; - *) - if test -n "$arg" ; then - echo "runtests: Unknown argument ($arg)" - exit 1 - fi - ;; - esac -done - -# If the programs are not available, run make. -if [ ! -x simple -a $makeeach = 0 -a $runtests = 1 ] ; then - $MAKE default -fi - -testfiles="" -if [ $runtests = 1 ] ; then - - -RunTest simple 4 "**** Testing simple.c ****" "-fname $FILENAME" - -RunTest async 4 "**** Testing async.c ****" "-fname $FILENAME" - -RunTest atomicity 4 "**** Testing atomicity.out ****" "-fname $FILENAME" - -RunTest coll_test 4 "**** Testing coll_test.out ****" "-fname $FILENAME" - -RunTest excl 4 "**** Testing excl.c ****" "-fname $FILENAME" - -RunTest file_info 4 "**** Testing file_info.c ****" "-fname $FILENAME" - -RunTest i_noncontig 2 "**** Testing i_noncontig.c ****" "-fname $FILENAME" - -RunTest noncontig 2 "**** Testing noncontig.c ****" "-fname $FILENAME" - -RunTest noncontig_coll 2 "**** Testing noncontig_coll.c ****" "-fname $FILENAME" - -RunTest noncontig_coll2 4 "**** Testing noncontig_coll2.c ****" "-fname $FILENAME" - -RunTest misc 4 "**** Testing misc.c ****" "-fname $FILENAME" - -RunTest shared_fp 4 "**** Testing shared_fp.c ****" "-fname $FILENAME" - -RunTest split_coll 4 "**** Testing split_coll.c ****" "-fname $FILENAME" - -RunTest psimple 4 "**** Testing psimple.c ****" "-fname $FILENAME" - -RunTest error 1 "**** Testing error.c ****" "-fname $FILENAME" - -RunTest status 1 "**** Testing status.c ****" "-fname $FILENAME" - -# -# Run Fortran tests ONLY if Fortran available -if [ @HAS_FORTRAN@ = 1 ] ; then - RunTest fmisc 4 "**** Testing fmisc.f ****" "-fname $FILENAME" - - RunTest fcoll_test 4 "**** Testing fcoll_test.f ****" "-fname $FILENAME" - - RunTest pfcoll_test 4 "**** Testing pfcoll_test.f ****" "-fname $FILENAME" -fi - -else - # Just run checks - testfiles=`echo *.out` -fi - -echo '*** Checking for differences from expected output ***' -CheckAllOutput context.diff -exit 0 - - - diff --git a/ompi/mca/io/romio/romio/test/runtests.in b/ompi/mca/io/romio/romio/test/runtests.in index 2c893defef..3f17318b52 100644 --- a/ompi/mca/io/romio/romio/test/runtests.in +++ b/ompi/mca/io/romio/romio/test/runtests.in @@ -273,14 +273,21 @@ echo '**** Testing noncontig_coll2.c ****' $mpirun -np 4 ./noncontig_coll2 -fname $FILENAME # CheckOutput noncontig_coll2 CleanExe noncontig_coll2 +MakeExe aggregation1 echo '**** Testing aggregation1 ****' -$mpirun -np 4 ./aggregation1 -h -fname $FILENAME +$mpirun -np 4 ./aggregation1 -h -f $FILENAME # CheckOutput aggregation1 CleanExe aggregation1 +MakeExe aggregation2 echo '**** Testing aggregation2 ****' $mpirun -np 4 ./aggregation2 $FILENAME # CheckOutput aggregation2 CleanExe aggregation2 +MakeExe hindexed +echo '**** Testing hindexed ****' +$mpirun -np 4 ./hindexed $FILENAME +# CheckOutput hindexed +CleanExe hindexed # #echo '**** Testing write_all_test (run 1)****' #$mpirun -np 4 ./write_all_test -nzp 2 -zplace 2 -nzw 2 -naw 2 -size 100 \ diff --git a/ompi/mca/io/romio/romio/util/romioinstall.in b/ompi/mca/io/romio/romio/util/romioinstall.in index b4b2d0d619..95a6309231 100644 --- a/ompi/mca/io/romio/romio/util/romioinstall.in +++ b/ompi/mca/io/romio/romio/util/romioinstall.in @@ -28,9 +28,10 @@ includedir=@includedir@ #sysconfdir=@sysconfdir@ libdir=@libdir@ #sharedlib_dir=@sharedlib_dir@ -datarootdir=@datarootdir@ mandir=@mandir@ htmldir=@htmldir@ +# datarootdir required as of autoconf 2.60 +datarootdir=@datatrootdir@ #datadir=@datadir@ docdir=@docdir@ ## Location of sources diff --git a/ompi/mca/io/romio/romio/util/tarch b/ompi/mca/io/romio/romio/util/tarch index 9a472b3b0a..ccccf4d514 100755 --- a/ompi/mca/io/romio/romio/util/tarch +++ b/ompi/mca/io/romio/romio/util/tarch @@ -53,20 +53,20 @@ for LARCH in $ARCHLIST ; do # LARCH4=`expr "$LARCH" : "\(....\)"` # LARCH6=`expr "$LARCH" : "\(......\)"` case $LARCH in - SUPER-UX) FARCH=SX4; break ;; - AIX|RIOS) FARCH=rs6000; break ;; - HP-UX) + SUPER-UX) FARCH=SX4; break ;; + AIX|RIOS) FARCH=rs6000; break ;; + HP-UX) if [ -a /dev/kmem ] ; then FARCH=hpux ; else FARCH=sppux ; fi break ;; - IRIX64|IRIX) FARCH=$LARCH ; break ;; - Linux) FARCH=LINUX ; break ;; - i586|i486|i86pc) + IRIX64|IRIX) FARCH=$LARCH ; break ;; + Linux) FARCH=LINUX ; break ;; + i586|i486|i86pc) GARCH=$LARCH ;; - sun4*) + sun4*) Version=`$UNAME -r` # In "improving" SunOS, the useful feature of "substr" was withdrawn # from expr. Can't let the users have life too easy, can we? This @@ -81,21 +81,27 @@ for LARCH in $ARCHLIST ; do FARCH=sun4 fi break ;; - hp9000*|hp7000*) + hp9000*|hp7000*) if [ -a /dev/kmem ] ; then FARCH=hpux ; else FARCH=sppux ; fi break ;; - mips|dec-5000) FARCH=dec5000 ; break ;; - IP12|iris-4d) GARCH=IRIX ;; - cray|CRAY*) GARCH=CRAY ;; + Darwin*) + FARCH=Darwin + break ;; + + mips|dec-5000) FARCH=dec5000 ; break ;; next) FARCH=NeXT ; break ;; KSR1|KSR2) FARCH=ksr ; break ;; FreeBSD) FARCH=freebsd ; break ;; OpenBSD) FARCH=openbsd ; break ;; NetBSD) FARCH=netbsd ; break ;; + + # The following are guesses + IP12|iris-4d) GARCH=IRIX ;; + cray|CRAY*) GARCH=CRAY ;; i386) GARCH=ipsc2 ;; ULTRIX|RISC) GARCH=dec5000 ;; esac diff --git a/ompi/mca/io/romio/src/io_romio_component.c b/ompi/mca/io/romio/src/io_romio_component.c index 13abc87976..9024d3a8cb 100644 --- a/ompi/mca/io/romio/src/io_romio_component.c +++ b/ompi/mca/io/romio/src/io_romio_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +26,7 @@ #include "ompi/mca/io/io.h" #include "io_romio.h" -#define ROMIO_VERSION_STRING "from MPICH2 v1.0.7 with additional compilation/bug patches from romio-maint@mcs.anl.gov" +#define ROMIO_VERSION_STRING "from MPICH2 v1.3.1 with an additional patch from romio-maint@mcs.anl.gov about an attribute issue" /* * Private functions diff --git a/ompi/mca/io/romio/src/io_romio_file_open.c b/ompi/mca/io/romio/src/io_romio_file_open.c index 1ace3166cd..46021af7e1 100644 --- a/ompi/mca/io/romio/src/io_romio_file_open.c +++ b/ompi/mca/io/romio/src/io_romio_file_open.c @@ -51,6 +51,13 @@ mca_io_romio_file_close (ompi_file_t *fh) int ret; mca_io_romio_data_t *data; + /* Because ROMIO expects the MPI library to provide error handler management + * routines but it doesn't ever participate in MPI_File_close, we have to + * somehow inform the MPI library that we no longer hold a reference to any + * user defined error handler. We do this by setting the errhandler at this + * point to MPI_ERRORS_RETURN. */ + PMPI_File_set_errhandler(fh, MPI_ERRORS_RETURN); + data = (mca_io_romio_data_t *) fh->f_io_selected_data; OPAL_THREAD_LOCK (&mca_io_romio_mutex);