ompio: resync v4.1 branch to master
this commit syncs ompio related directories in v4.1.x to master. The efforts to bring the lustre performance fixes and support for external32 data representation over were too overwhelming when dealing with every single pr individually. There are a very few minor modification that had to be done for syncing: - v4.1.x does not have opal/mutex.h - v4.1.x does not have opal_atomic_int32_t datatype - the io module structure has two fewer function pointers (related to info_set/get) compared to the version on master. Tested so far with the ompio testsuite as well as hdf5-1.10.5 testsuite (testphdf5, t_shapesame, t_bigio) on an XFS file system. More tests on Lustre and BeeGFS to follow. Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
родитель
7fd4f3261d
Коммит
ff130e7a9c
@ -25,6 +25,7 @@ headers = \
|
||||
common_ompio_aggregators.h \
|
||||
common_ompio_print_queue.h \
|
||||
common_ompio_request.h \
|
||||
common_ompio_buffer.h \
|
||||
common_ompio.h
|
||||
|
||||
sources = \
|
||||
@ -34,6 +35,7 @@ sources = \
|
||||
common_ompio_file_open.c \
|
||||
common_ompio_file_view.c \
|
||||
common_ompio_file_read.c \
|
||||
common_ompio_buffer.c \
|
||||
common_ompio_file_write.c
|
||||
|
||||
|
||||
@ -74,10 +76,6 @@ else
|
||||
ompidir = $(includedir)
|
||||
endif
|
||||
|
||||
if OPAL_cuda_support
|
||||
headers += common_ompio_cuda.h
|
||||
sources += common_ompio_cuda.c
|
||||
endif
|
||||
|
||||
# These two rules will sym link the "noinst" libtool library filename
|
||||
# to the installable libtool library filename in the case where we are
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/errhandler/errhandler.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "ompi/file/file.h"
|
||||
#include "ompi/mca/io/io.h"
|
||||
#include "ompi/mca/fs/fs.h"
|
||||
@ -66,7 +65,8 @@
|
||||
#define OMPIO_LOCK_ENTIRE_FILE 0x00000080
|
||||
#define OMPIO_LOCK_NEVER 0x00000100
|
||||
#define OMPIO_LOCK_NOT_THIS_OP 0x00000200
|
||||
|
||||
#define OMPIO_DATAREP_NATIVE 0x00000400
|
||||
#define OMPIO_COLLECTIVE_OP 0x00000800
|
||||
|
||||
#define OMPIO_ROOT 0
|
||||
|
||||
@ -158,7 +158,8 @@ struct ompio_file_t {
|
||||
ompi_communicator_t *f_comm;
|
||||
const char *f_filename;
|
||||
char *f_datarep;
|
||||
opal_convertor_t *f_convertor;
|
||||
opal_convertor_t *f_mem_convertor;
|
||||
opal_convertor_t *f_file_convertor;
|
||||
opal_info_t *f_info;
|
||||
int32_t f_flags;
|
||||
void *f_fs_ptr;
|
||||
@ -255,10 +256,16 @@ OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_O
|
||||
const void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_write_all (ompio_file_t *fh, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_write_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_all (ompio_file_t *fp, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
|
||||
@ -266,7 +273,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MP
|
||||
OMPI_DECLSPEC int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
size_t bytes_per_cycle, size_t max_data, uint32_t iov_count,
|
||||
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw,
|
||||
size_t *spc );
|
||||
size_t *spc, mca_common_ompio_io_array_t **io_array,
|
||||
int *num_io_entries );
|
||||
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_read (ompio_file_t *fh, void *buf, int count,
|
||||
@ -283,10 +291,16 @@ OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFF
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_read_all (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_read_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iread_all (ompio_file_t *fp, void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
@ -319,6 +333,7 @@ OMPI_DECLSPEC int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
int count,
|
||||
const void *buf,
|
||||
size_t *max_data,
|
||||
opal_convertor_t *convertor,
|
||||
struct iovec **iov,
|
||||
uint32_t *iov_count);
|
||||
|
||||
|
@ -1303,12 +1303,14 @@ int mca_common_ompio_prepare_to_group(ompio_file_t *fh,
|
||||
fh->f_comm);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
opal_output (1, "mca_common_ompio_prepare_to_group: error in ompi_fcoll_base_coll_allgather_array\n");
|
||||
free(start_offsets_lens_tmp);
|
||||
goto exit;
|
||||
}
|
||||
end_offsets_tmp = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_init_procs_per_group * sizeof(OMPI_MPI_OFFSET_TYPE));
|
||||
if (NULL == end_offsets_tmp) {
|
||||
opal_output (1, "OUT OF MEMORY\n");
|
||||
goto exit;
|
||||
free(start_offsets_lens_tmp);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for( k = 0 ; k < fh->f_init_procs_per_group; k++){
|
||||
end_offsets_tmp[k] = start_offsets_lens_tmp[3*k] + start_offsets_lens_tmp[3*k+1];
|
||||
@ -1333,14 +1335,12 @@ int mca_common_ompio_prepare_to_group(ompio_file_t *fh,
|
||||
if (NULL == aggr_bytes_per_group_tmp) {
|
||||
opal_output (1, "OUT OF MEMORY\n");
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
free(end_offsets_tmp);
|
||||
goto exit;
|
||||
}
|
||||
decision_list_tmp = (int* )malloc (fh->f_init_num_aggrs * sizeof(int));
|
||||
if (NULL == decision_list_tmp) {
|
||||
opal_output (1, "OUT OF MEMORY\n");
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
free(end_offsets_tmp);
|
||||
if (NULL != aggr_bytes_per_group_tmp) {
|
||||
free(aggr_bytes_per_group_tmp);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,18 +27,20 @@
|
||||
#include "opal/mca/allocator/allocator.h"
|
||||
#include "opal/mca/allocator/base/base.h"
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_cuda.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
|
||||
|
||||
static opal_mutex_t mca_common_ompio_cuda_mutex; /* lock for thread safety */
|
||||
static opal_mutex_t mca_common_ompio_buffer_mutex; /* lock for thread safety */
|
||||
static mca_allocator_base_component_t* mca_common_ompio_allocator_component=NULL;
|
||||
static mca_allocator_base_module_t* mca_common_ompio_allocator=NULL;
|
||||
|
||||
static int32_t mca_common_ompio_cuda_init = 0;
|
||||
//static opal_atomic_int32_t mca_common_ompio_buffer_init = 0;
|
||||
static int32_t mca_common_ompio_buffer_init = 0;
|
||||
static int32_t mca_common_ompio_pagesize=4096;
|
||||
static void* mca_common_ompio_cuda_alloc_seg ( void *ctx, size_t *size );
|
||||
static void mca_common_ompio_cuda_free_seg ( void *ctx, void *buf );
|
||||
static void* mca_common_ompio_buffer_alloc_seg ( void *ctx, size_t *size );
|
||||
static void mca_common_ompio_buffer_free_seg ( void *ctx, void *buf );
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is_gpu,
|
||||
int *is_managed)
|
||||
{
|
||||
@ -57,8 +59,9 @@ void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void* mca_common_ompio_cuda_alloc_seg ( void*ctx, size_t *size )
|
||||
static void* mca_common_ompio_buffer_alloc_seg ( void*ctx, size_t *size )
|
||||
{
|
||||
char *buf=NULL;
|
||||
size_t realsize, numpages;
|
||||
@ -67,64 +70,67 @@ static void* mca_common_ompio_cuda_alloc_seg ( void*ctx, size_t *size )
|
||||
realsize = numpages * mca_common_ompio_pagesize;
|
||||
|
||||
buf = malloc ( realsize);
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( NULL != buf ) {
|
||||
mca_common_cuda_register ( ( char *)buf, realsize, NULL );
|
||||
}
|
||||
#endif
|
||||
*size = realsize;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void mca_common_ompio_cuda_free_seg ( void *ctx, void *buf )
|
||||
static void mca_common_ompio_buffer_free_seg ( void *ctx, void *buf )
|
||||
{
|
||||
if ( NULL != buf ) {
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_common_cuda_unregister ( (char *) buf, NULL );
|
||||
#endif
|
||||
free ( buf );
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int mca_common_ompio_cuda_alloc_init ( void )
|
||||
int mca_common_ompio_buffer_alloc_init ( void )
|
||||
{
|
||||
bool thread_safe=true;
|
||||
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_common_ompio_cuda_init, 1) > 1)
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_common_ompio_buffer_init, 1) > 1)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* initialize static objects */
|
||||
OBJ_CONSTRUCT(&mca_common_ompio_cuda_mutex, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_common_ompio_buffer_mutex, opal_mutex_t);
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex );
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex );
|
||||
/* lookup name of the allocator to use */
|
||||
if(NULL == (mca_common_ompio_allocator_component = mca_allocator_component_lookup("basic"))) {
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_ERR_BUFFER;
|
||||
}
|
||||
|
||||
/* create an instance of the allocator */
|
||||
mca_common_ompio_allocator = mca_common_ompio_allocator_component->allocator_init(thread_safe,
|
||||
mca_common_ompio_cuda_alloc_seg,
|
||||
mca_common_ompio_cuda_free_seg,
|
||||
mca_common_ompio_buffer_alloc_seg,
|
||||
mca_common_ompio_buffer_free_seg,
|
||||
NULL);
|
||||
if(NULL == mca_common_ompio_allocator) {
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_ERR_BUFFER;
|
||||
}
|
||||
|
||||
// mca_common_ompio_pagesize = sysconf(_SC_PAGESIZE);
|
||||
mca_common_ompio_pagesize = opal_getpagesize();
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_common_ompio_cuda_alloc_fini ( void )
|
||||
int mca_common_ompio_buffer_alloc_fini ( void )
|
||||
{
|
||||
if ( NULL != mca_common_ompio_allocator ) {
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
mca_common_ompio_allocator->alc_finalize(mca_common_ompio_allocator);
|
||||
mca_common_ompio_allocator=NULL;
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OBJ_DESTRUCT (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
OBJ_DESTRUCT (&mca_common_ompio_buffer_mutex);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -134,31 +140,31 @@ void *mca_common_ompio_alloc_buf ( ompio_file_t *fh, size_t bufsize )
|
||||
{
|
||||
char *tmp=NULL;
|
||||
|
||||
if ( !mca_common_ompio_cuda_init ){
|
||||
mca_common_ompio_cuda_alloc_init ();
|
||||
if ( !mca_common_ompio_buffer_init ){
|
||||
mca_common_ompio_buffer_alloc_init ();
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
tmp = mca_common_ompio_allocator->alc_alloc (mca_common_ompio_allocator,
|
||||
bufsize, 0 );
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void mca_common_ompio_release_buf ( ompio_file_t *fh, void *buf )
|
||||
{
|
||||
|
||||
if ( !mca_common_ompio_cuda_init ){
|
||||
if ( !mca_common_ompio_buffer_init ){
|
||||
/* Should not happen. You can not release a buf without
|
||||
** having it allocated first.
|
||||
*/
|
||||
opal_output (1, "error in mca_common_ompio_release_buf: allocator not initialized\n");
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
mca_common_ompio_allocator->alc_free (mca_common_ompio_allocator,
|
||||
buf);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
|
||||
return;
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -22,9 +22,9 @@
|
||||
#define MCA_COMMON_OMPIO_CUDA_H
|
||||
|
||||
|
||||
#define OMPIO_CUDA_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
opal_convertor_clone ( _fh->f_convertor, _convertor, 0); \
|
||||
opal_convertor_prepare_for_send ( _convertor, &(_datatype->super), _count, _buf );\
|
||||
#define OMPIO_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
|
||||
opal_convertor_copy_and_prepare_for_send ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, CONVERTOR_SEND_CONVERSION, _convertor ); \
|
||||
opal_convertor_get_packed_size( _convertor, &_max_data ); \
|
||||
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
|
||||
if ( NULL == _tbuf ) { \
|
||||
@ -40,11 +40,30 @@
|
||||
_decoded_iov->iov_len = _max_data; \
|
||||
_iov_count=1;}
|
||||
|
||||
#define OMPIO_PREPARE_READ_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
|
||||
opal_convertor_copy_and_prepare_for_recv ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, 0, _convertor ); \
|
||||
opal_convertor_get_packed_size( _convertor, &_max_data ); \
|
||||
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
|
||||
if ( NULL == _tbuf ) { \
|
||||
opal_output(1, "common_ompio: error allocating memory\n"); \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
_decoded_iov = (struct iovec *) malloc ( sizeof ( struct iovec )); \
|
||||
if ( NULL == _decoded_iov ) { \
|
||||
opal_output(1, "common_ompio: could not allocate memory.\n"); \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
_decoded_iov->iov_base = _tbuf; \
|
||||
_decoded_iov->iov_len = _max_data; \
|
||||
_iov_count=1;}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf,
|
||||
int *is_gpu, int *is_managed);
|
||||
int mca_common_ompio_cuda_alloc_init ( void );
|
||||
int mca_common_ompio_cuda_alloc_fini ( void );
|
||||
#endif
|
||||
int mca_common_ompio_buffer_alloc_init ( void );
|
||||
int mca_common_ompio_buffer_alloc_fini ( void );
|
||||
|
||||
|
||||
void* mca_common_ompio_alloc_buf ( ompio_file_t *fh, size_t bufsize);
|
@ -75,7 +75,8 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
|
||||
ompio_fh->f_rank = ompi_comm_rank (comm);
|
||||
ompio_fh->f_size = ompi_comm_size (comm);
|
||||
remote_arch = opal_local_arch;
|
||||
ompio_fh->f_convertor = opal_convertor_create (remote_arch, 0);
|
||||
ompio_fh->f_mem_convertor = opal_convertor_create (remote_arch, 0);
|
||||
ompio_fh->f_file_convertor = opal_convertor_create (remote_arch, 0);
|
||||
|
||||
if ( true == use_sharedfp ) {
|
||||
ret = ompi_comm_dup (comm, &ompio_fh->f_comm);
|
||||
@ -323,17 +324,23 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
|
||||
ompio_fh->f_decoded_iov = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_convertor) {
|
||||
free (ompio_fh->f_convertor);
|
||||
ompio_fh->f_convertor = NULL;
|
||||
if (NULL != ompio_fh->f_mem_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_mem_convertor);
|
||||
free (ompio_fh->f_mem_convertor);
|
||||
ompio_fh->f_mem_convertor = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_file_convertor);
|
||||
free (ompio_fh->f_file_convertor);
|
||||
ompio_fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_datarep) {
|
||||
free (ompio_fh->f_datarep);
|
||||
ompio_fh->f_datarep = NULL;
|
||||
}
|
||||
|
||||
|
||||
if ( NULL != ompio_fh->f_coll_write_time ) {
|
||||
free ( ompio_fh->f_coll_write_time );
|
||||
ompio_fh->f_coll_write_time = NULL;
|
||||
@ -387,10 +394,10 @@ int mca_common_ompio_file_get_position (ompio_file_t *fh,
|
||||
if ( 0 == fh->f_view_extent ||
|
||||
0 == fh->f_view_size ||
|
||||
0 == fh->f_etype_size ) {
|
||||
/* not sure whether we should raise an error here */
|
||||
*offset = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* No. of copies of the entire file view */
|
||||
off = (fh->f_offset - fh->f_disp)/fh->f_view_extent;
|
||||
|
||||
@ -564,6 +571,7 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
int count,
|
||||
const void *buf,
|
||||
size_t *max_data,
|
||||
opal_convertor_t *conv,
|
||||
struct iovec **iov,
|
||||
uint32_t *iovec_count)
|
||||
{
|
||||
@ -578,7 +586,7 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
size_t temp_data;
|
||||
|
||||
|
||||
opal_convertor_clone (fh->f_convertor, &convertor, 0);
|
||||
opal_convertor_clone (conv, &convertor, 0);
|
||||
|
||||
if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor,
|
||||
&(datatype->super),
|
||||
@ -674,7 +682,8 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
}
|
||||
|
||||
free (temp_iov);
|
||||
|
||||
opal_convertor_cleanup (&convertor);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -33,12 +33,10 @@
|
||||
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_request.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
|
||||
/* Read and write routines are split into two interfaces.
|
||||
** The
|
||||
@ -90,39 +88,52 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool need_to_copy = false;
|
||||
opal_convertor_t convertor;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
opal_convertor_t convertor;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
char *tbuf=NULL;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
}
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
}
|
||||
else {
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
status->_ucount = 0;
|
||||
}
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -152,7 +163,9 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
|
||||
&i,
|
||||
&j,
|
||||
&total_bytes_read,
|
||||
&spc);
|
||||
&spc,
|
||||
&fh->f_io_array,
|
||||
&fh->f_num_of_io_entries);
|
||||
|
||||
if (fh->f_num_of_io_entries) {
|
||||
ret_code = fh->f_fbtl->fbtl_preadv (fh);
|
||||
@ -168,15 +181,14 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( is_gpu && !is_managed ) {
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
|
||||
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup (&convertor);
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
@ -255,13 +267,32 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
|
||||
int i = 0; /* index into the decoded iovec of the buffer */
|
||||
int j = 0; /* index into the file vie iovec */
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
char *tbuf=NULL;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
ompio_req->req_tbuf = tbuf;
|
||||
ompio_req->req_size = max_data;
|
||||
@ -272,23 +303,21 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
ompio_req->req_ompi.req_status._ucount = 0;
|
||||
ompi_request_complete (&ompio_req->req_ompi, false);
|
||||
*request = (ompi_request_t *) ompio_req;
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -305,7 +334,9 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
|
||||
&i,
|
||||
&j,
|
||||
&total_bytes_read,
|
||||
&spc);
|
||||
&spc,
|
||||
&fh->f_io_array,
|
||||
&fh->f_num_of_io_entries);
|
||||
|
||||
if (fh->f_num_of_io_entries) {
|
||||
fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req);
|
||||
@ -372,6 +403,62 @@ int mca_common_ompio_file_iread_at (ompio_file_t *fh,
|
||||
|
||||
|
||||
/* Infrastructure for collective operations */
|
||||
int mca_common_ompio_file_read_all (ompio_file_t *fh,
|
||||
void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* No need to check for GPU buffer for collective I/O.
|
||||
Most algorithms copy data from aggregators, and send/recv
|
||||
to/from GPU buffers works if ompi was compiled was GPU support.
|
||||
|
||||
If the individual fcoll component is used: there are no aggregators
|
||||
in that concept. However, since they call common_ompio_file_write,
|
||||
CUDA buffers are handled by that routine.
|
||||
|
||||
Thus, we only check for
|
||||
1. Datarepresentation is anything other than 'native' and
|
||||
2. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
size_t pos=0, max_data=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
struct iovec *decoded_iov = NULL;
|
||||
uint32_t iov_count = 0;
|
||||
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
decoded_iov->iov_base,
|
||||
decoded_iov->iov_len,
|
||||
MPI_BYTE,
|
||||
status);
|
||||
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
|
||||
opal_convertor_cleanup (&convertor);
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_read_at_all (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf,
|
||||
@ -384,16 +471,41 @@ int mca_common_ompio_file_read_at_all (ompio_file_t *fh,
|
||||
mca_common_ompio_file_get_position (fh, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, offset);
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
ret = mca_common_ompio_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iread_all (ompio_file_t *fp,
|
||||
void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iread_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf,
|
||||
@ -407,25 +519,17 @@ int mca_common_ompio_file_iread_at_all (ompio_file_t *fp,
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
|
||||
ret = mca_common_ompio_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int mca_common_ompio_set_explicit_offset (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset)
|
||||
{
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
@ -91,6 +91,12 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
fh->f_datarep = NULL;
|
||||
}
|
||||
|
||||
if (NULL != fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (fh->f_file_convertor);
|
||||
free (fh->f_file_convertor);
|
||||
fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
/* Reset the flags first */
|
||||
if ( fh->f_flags & OMPIO_CONTIGUOUS_FVIEW ) {
|
||||
fh->f_flags &= ~OMPIO_CONTIGUOUS_FVIEW;
|
||||
@ -98,9 +104,24 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) {
|
||||
fh->f_flags &= ~OMPIO_UNIFORM_FVIEW;
|
||||
}
|
||||
if ( fh->f_flags & OMPIO_DATAREP_NATIVE ) {
|
||||
fh->f_flags &= ~OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
fh->f_datarep = strdup (datarep);
|
||||
datatype_duplicate (filetype, &fh->f_orig_filetype );
|
||||
|
||||
if ( !(strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
|
||||
fh->f_file_convertor = malloc ( sizeof(struct opal_convertor_t) );
|
||||
if ( NULL == fh->f_file_convertor ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_convertor_clone (ompi_mpi_external32_convertor, fh->f_file_convertor, 0);
|
||||
}
|
||||
else {
|
||||
fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0);
|
||||
fh->f_flags |= OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
|
||||
datatype_duplicate (filetype, &fh->f_orig_filetype );
|
||||
opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent);
|
||||
opal_datatype_type_size (&filetype->super, &ftype_size);
|
||||
|
||||
@ -129,6 +150,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
1,
|
||||
NULL,
|
||||
&max_data,
|
||||
fh->f_file_convertor,
|
||||
&fh->f_decoded_iov,
|
||||
&fh->f_iov_count);
|
||||
|
||||
|
@ -31,13 +31,10 @@
|
||||
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_request.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
|
||||
int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
const void *buf,
|
||||
int count,
|
||||
@ -72,16 +69,34 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup ( &convertor);
|
||||
}
|
||||
@ -91,22 +106,19 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
status->_ucount = 0;
|
||||
}
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -134,7 +146,9 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
&i,
|
||||
&j,
|
||||
&total_bytes_written,
|
||||
&spc);
|
||||
&spc,
|
||||
&fh->f_io_array,
|
||||
&fh->f_num_of_io_entries);
|
||||
|
||||
if (fh->f_num_of_io_entries) {
|
||||
ret_code =fh->f_fbtl->fbtl_pwritev (fh);
|
||||
@ -149,11 +163,11 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
fh->f_io_array = NULL;
|
||||
}
|
||||
}
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( is_gpu && !is_managed ) {
|
||||
|
||||
if ( need_to_copy ) {
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
@ -228,16 +242,34 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
int i = 0; /* index into the decoded iovec of the buffer */
|
||||
int j = 0; /* index into the file vie iovec */
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup (&convertor);
|
||||
|
||||
@ -250,23 +282,21 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
ompio_req->req_ompi.req_status._ucount = 0;
|
||||
ompi_request_complete (&ompio_req->req_ompi, false);
|
||||
*request = (ompi_request_t *) ompio_req;
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -283,7 +313,9 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
&i,
|
||||
&j,
|
||||
&total_bytes_written,
|
||||
&spc);
|
||||
&spc,
|
||||
&fh->f_io_array,
|
||||
&fh->f_num_of_io_entries);
|
||||
|
||||
if (fh->f_num_of_io_entries) {
|
||||
fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req);
|
||||
@ -348,6 +380,62 @@ int mca_common_ompio_file_iwrite_at (ompio_file_t *fh,
|
||||
|
||||
/* Collective operations */
|
||||
/******************************************************************/
|
||||
int mca_common_ompio_file_write_all (ompio_file_t *fh,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* No need to check for GPU buffer for collective I/O.
|
||||
Most algorithms first copy data to aggregators, and send/recv
|
||||
to/from GPU buffers works if ompi was compiled was GPU support.
|
||||
|
||||
If the individual fcoll component is used: there are no aggregators
|
||||
in that concept. However, since they call common_ompio_file_write,
|
||||
CUDA buffers are handled by that routine.
|
||||
|
||||
Thus, we only check for
|
||||
1. Datarepresentation is anything other than 'native' and
|
||||
2. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
size_t pos=0, max_data=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
struct iovec *decoded_iov = NULL;
|
||||
uint32_t iov_count = 0;
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup ( &convertor);
|
||||
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
decoded_iov->iov_base,
|
||||
decoded_iov->iov_len,
|
||||
MPI_BYTE,
|
||||
status);
|
||||
|
||||
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_write_at_all (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
@ -361,30 +449,23 @@ int mca_common_ompio_file_write_at_all (ompio_file_t *fh,
|
||||
mca_common_ompio_file_get_position (fh, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, offset);
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
ret = mca_common_ompio_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
int mca_common_ompio_file_iwrite_all (ompio_file_t *fp,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
|
||||
int ret = OMPI_SUCCESS;
|
||||
OMPI_MPI_OFFSET_TYPE prev_offset;
|
||||
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iwrite_all (fp,
|
||||
@ -400,18 +481,40 @@ int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
ret = mca_common_ompio_file_iwrite ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
|
||||
int ret = OMPI_SUCCESS;
|
||||
OMPI_MPI_OFFSET_TYPE prev_offset;
|
||||
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
ret = mca_common_ompio_file_iwrite_all ( fp, buf, count, datatype, request );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Helper function used by both read and write operations */
|
||||
/**************************************************************/
|
||||
|
||||
int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
size_t bytes_per_cycle, size_t max_data, uint32_t iov_count,
|
||||
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw,
|
||||
size_t *spc)
|
||||
size_t *spc, mca_common_ompio_io_array_t **io_array,
|
||||
int *num_io_entries)
|
||||
{
|
||||
ptrdiff_t disp;
|
||||
int block = 1;
|
||||
@ -424,7 +527,9 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
int k = 0; /* index into the io_array */
|
||||
int i = *ii;
|
||||
int j = *jj;
|
||||
|
||||
mca_common_ompio_io_array_t *f_io_array=NULL;
|
||||
int f_num_io_entries=0;
|
||||
|
||||
sum_previous_length = fh->f_position_in_file_view;
|
||||
|
||||
if ((index == cycles-1) && (max_data % bytes_per_cycle)) {
|
||||
@ -434,9 +539,9 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
bytes_to_write_in_cycle = bytes_per_cycle;
|
||||
}
|
||||
|
||||
fh->f_io_array = (mca_common_ompio_io_array_t *)malloc
|
||||
f_io_array = (mca_common_ompio_io_array_t *)malloc
|
||||
(OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_common_ompio_io_array_t));
|
||||
if (NULL == fh->f_io_array) {
|
||||
if (NULL == f_io_array) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -445,10 +550,10 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
/* reallocate if needed */
|
||||
if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) {
|
||||
block ++;
|
||||
fh->f_io_array = (mca_common_ompio_io_array_t *)realloc
|
||||
(fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE *
|
||||
f_io_array = (mca_common_ompio_io_array_t *)realloc
|
||||
(f_io_array, OMPIO_IOVEC_INITIAL_SIZE *
|
||||
block * sizeof (mca_common_ompio_io_array_t));
|
||||
if (NULL == fh->f_io_array) {
|
||||
if (NULL == f_io_array) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -462,15 +567,15 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
|
||||
disp = (ptrdiff_t)decoded_iov[i].iov_base +
|
||||
(total_bytes_written - sum_previous_counts);
|
||||
fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp;
|
||||
f_io_array[k].memory_address = (IOVBASE_TYPE *)disp;
|
||||
|
||||
if (decoded_iov[i].iov_len -
|
||||
(total_bytes_written - sum_previous_counts) >=
|
||||
bytes_to_write_in_cycle) {
|
||||
fh->f_io_array[k].length = bytes_to_write_in_cycle;
|
||||
f_io_array[k].length = bytes_to_write_in_cycle;
|
||||
}
|
||||
else {
|
||||
fh->f_io_array[k].length = decoded_iov[i].iov_len -
|
||||
f_io_array[k].length = decoded_iov[i].iov_len -
|
||||
(total_bytes_written - sum_previous_counts);
|
||||
}
|
||||
|
||||
@ -492,36 +597,36 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
|
||||
disp = (ptrdiff_t)fh->f_decoded_iov[j].iov_base +
|
||||
(fh->f_total_bytes - sum_previous_length);
|
||||
fh->f_io_array[k].offset = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset);
|
||||
f_io_array[k].offset = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset);
|
||||
|
||||
if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) {
|
||||
if (fh->f_decoded_iov[j].iov_len -
|
||||
(fh->f_total_bytes - sum_previous_length)
|
||||
< fh->f_io_array[k].length) {
|
||||
fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len -
|
||||
< f_io_array[k].length) {
|
||||
f_io_array[k].length = fh->f_decoded_iov[j].iov_len -
|
||||
(fh->f_total_bytes - sum_previous_length);
|
||||
}
|
||||
}
|
||||
|
||||
total_bytes_written += fh->f_io_array[k].length;
|
||||
fh->f_total_bytes += fh->f_io_array[k].length;
|
||||
bytes_to_write_in_cycle -= fh->f_io_array[k].length;
|
||||
total_bytes_written += f_io_array[k].length;
|
||||
fh->f_total_bytes += f_io_array[k].length;
|
||||
bytes_to_write_in_cycle -= f_io_array[k].length;
|
||||
k = k + 1;
|
||||
}
|
||||
fh->f_position_in_file_view = sum_previous_length;
|
||||
fh->f_index_in_file_view = j;
|
||||
fh->f_num_of_io_entries = k;
|
||||
f_num_io_entries = k;
|
||||
|
||||
#if 0
|
||||
if (fh->f_rank == 0) {
|
||||
int d;
|
||||
printf("*************************** %d\n", fh->f_num_of_io_entries);
|
||||
printf("*************************** %d\n", f_num_io_entries);
|
||||
|
||||
for (d=0 ; d<fh->f_num_of_io_entries ; d++) {
|
||||
for (d=0 ; d<f_num_of_entries ; d++) {
|
||||
printf(" ADDRESS: %p OFFSET: %p LENGTH: %d prev_count=%ld prev_length=%ld\n",
|
||||
fh->f_io_array[d].memory_address,
|
||||
fh->f_io_array[d].offset,
|
||||
fh->f_io_array[d].length,
|
||||
f_io_array[d].memory_address,
|
||||
f_io_array[d].offset,
|
||||
f_io_array[d].length,
|
||||
sum_previous_counts, sum_previous_length);
|
||||
}
|
||||
}
|
||||
@ -530,7 +635,9 @@ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
|
||||
*jj = j;
|
||||
*tbw = total_bytes_written;
|
||||
*spc = sum_previous_counts;
|
||||
|
||||
*io_array = f_io_array;
|
||||
*num_io_entries = f_num_io_entries;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -19,9 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "common_ompio_request.h"
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
#include "common_ompio_buffer.h"
|
||||
|
||||
static void mca_common_ompio_request_construct(mca_ompio_request_t* req);
|
||||
static void mca_common_ompio_request_destruct(mca_ompio_request_t *req);
|
||||
@ -37,7 +35,6 @@ opal_list_t mca_common_ompio_pending_requests = {{0}};
|
||||
static int mca_common_ompio_request_free ( struct ompi_request_t **req)
|
||||
{
|
||||
mca_ompio_request_t *ompio_req = ( mca_ompio_request_t *)*req;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( NULL != ompio_req->req_tbuf ) {
|
||||
if ( MCA_OMPIO_REQUEST_READ == ompio_req->req_type ){
|
||||
struct iovec decoded_iov;
|
||||
@ -50,7 +47,6 @@ static int mca_common_ompio_request_free ( struct ompi_request_t **req)
|
||||
}
|
||||
mca_common_ompio_release_buf ( NULL, ompio_req->req_tbuf );
|
||||
}
|
||||
#endif
|
||||
if ( NULL != ompio_req->req_free_fn ) {
|
||||
ompio_req->req_free_fn (ompio_req );
|
||||
}
|
||||
@ -77,10 +73,8 @@ void mca_common_ompio_request_construct(mca_ompio_request_t* req)
|
||||
req->req_ompi.req_cancel = mca_common_ompio_request_cancel;
|
||||
req->req_ompi.req_type = OMPI_REQUEST_IO;
|
||||
req->req_data = NULL;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
req->req_tbuf = NULL;
|
||||
req->req_size = 0;
|
||||
#endif
|
||||
req->req_progress_fn = NULL;
|
||||
req->req_free_fn = NULL;
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -52,11 +52,9 @@ struct mca_ompio_request_t {
|
||||
mca_ompio_request_type_t req_type;
|
||||
void *req_data;
|
||||
opal_list_item_t req_item;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void *req_tbuf;
|
||||
size_t req_size;
|
||||
opal_convertor_t req_convertor;
|
||||
#endif
|
||||
mca_fbtl_base_module_progress_fn_t req_progress_fn;
|
||||
mca_fbtl_base_module_request_free_fn_t req_free_fn;
|
||||
};
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -29,6 +29,11 @@
|
||||
#include "ompi/mca/common/ompio/common_ompio_request.h"
|
||||
|
||||
extern int mca_fbtl_posix_priority;
|
||||
extern bool mca_fbtl_posix_read_datasieving;
|
||||
extern bool mca_fbtl_posix_write_datasieving;
|
||||
extern size_t mca_fbtl_posix_max_block_size;
|
||||
extern size_t mca_fbtl_posix_max_gap_size;
|
||||
extern size_t mca_fbtl_posix_max_tmpbuf_size;
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -37,6 +37,15 @@ const char *mca_fbtl_posix_component_version_string =
|
||||
"OMPI/MPI posix FBTL MCA component version " OMPI_VERSION;
|
||||
|
||||
int mca_fbtl_posix_priority = 10;
|
||||
bool mca_fbtl_posix_read_datasieving = true;
|
||||
bool mca_fbtl_posix_write_datasieving = true;
|
||||
size_t mca_fbtl_posix_max_block_size = 1048576; // 1MB
|
||||
size_t mca_fbtl_posix_max_gap_size = 4096; // Size of a block in many linux fs
|
||||
size_t mca_fbtl_posix_max_tmpbuf_size = 67108864; // 64 MB
|
||||
/*
|
||||
* Private functions
|
||||
*/
|
||||
static int register_component(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
@ -54,6 +63,7 @@ mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component = {
|
||||
.mca_component_name = "posix",
|
||||
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION),
|
||||
.mca_register_component_params = register_component,
|
||||
},
|
||||
.fbtlm_data = {
|
||||
/* This component is checkpointable */
|
||||
@ -63,3 +73,62 @@ mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component = {
|
||||
.fbtlm_file_query = mca_fbtl_posix_component_file_query, /* get priority and actions */
|
||||
.fbtlm_file_unquery = mca_fbtl_posix_component_file_unquery, /* undo what was done by previous function */
|
||||
};
|
||||
|
||||
static int register_component(void)
|
||||
{
|
||||
mca_fbtl_posix_priority = 10;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"priority", "Priority of the fbtl posix component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_priority);
|
||||
|
||||
mca_fbtl_posix_max_block_size = 1048576;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"max_block_size", "Maximum average size in bytes of a data block in an iovec for data sieving. "
|
||||
"An average block size larger than this parameter will disable data sieving. Default: 1048576 bytes.",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_max_block_size );
|
||||
|
||||
mca_fbtl_posix_max_gap_size = 4096;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"max_gap_size", "Maximum average gap size between two blocks in an iovec for data sieving. "
|
||||
"An average gap size larger than this parameter will disable data sieving. Default: 4096 bytes. " ,
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_max_gap_size );
|
||||
|
||||
mca_fbtl_posix_max_tmpbuf_size = 67108864;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"max_tmpbuf_size", "Maximum size of the temporary buffer used for data sieving in bytes. "
|
||||
"Default: 67108864 (64MB). " ,
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_max_tmpbuf_size );
|
||||
|
||||
mca_fbtl_posix_read_datasieving = true;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"read_datasieving", "Parameter indicating whether to perform data sieving for read operations. "
|
||||
"Default: true.",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_read_datasieving );
|
||||
|
||||
mca_fbtl_posix_write_datasieving = true;
|
||||
(void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version,
|
||||
"write_datasieving", "Parameter indicating whether to perform data sieving for write operations. "
|
||||
"Default: true.",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_fbtl_posix_write_datasieving );
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2017 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -28,100 +28,260 @@
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/fbtl/fbtl.h"
|
||||
|
||||
|
||||
static ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh);
|
||||
static ssize_t mca_fbtl_posix_preadv_generic (ompio_file_t *fh);
|
||||
|
||||
ssize_t mca_fbtl_posix_preadv (ompio_file_t *fh )
|
||||
{
|
||||
/*int *fp = NULL;*/
|
||||
int i, block=1, ret;
|
||||
struct iovec *iov = NULL;
|
||||
int iov_count = 0;
|
||||
OMPI_MPI_OFFSET_TYPE iov_offset = 0;
|
||||
ssize_t bytes_read=0, ret_code=0;
|
||||
struct flock lock;
|
||||
off_t total_length, end_offset=0;
|
||||
int ret;
|
||||
|
||||
if (NULL == fh->f_io_array) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if ( fh->f_num_of_io_entries > 1 ) {
|
||||
bool do_data_sieving = true;
|
||||
|
||||
iov = (struct iovec *) malloc
|
||||
(OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec));
|
||||
size_t avg_gap_size=0;
|
||||
size_t avg_block_size = 0;
|
||||
off_t prev_offset = (off_t)fh->f_io_array[0].offset;
|
||||
int i;
|
||||
for ( i=0; i< fh->f_num_of_io_entries; i++ ) {
|
||||
avg_block_size += fh->f_io_array[i].length;
|
||||
avg_gap_size += (size_t)((off_t)fh->f_io_array[i].offset - prev_offset);
|
||||
prev_offset = (off_t)fh->f_io_array[i].offset;
|
||||
}
|
||||
avg_block_size = avg_block_size / fh->f_num_of_io_entries;
|
||||
avg_gap_size = avg_gap_size / fh->f_num_of_io_entries;
|
||||
|
||||
if ( false == mca_fbtl_posix_read_datasieving ||
|
||||
0 == avg_gap_size ||
|
||||
avg_block_size > mca_fbtl_posix_max_block_size ||
|
||||
avg_gap_size > mca_fbtl_posix_max_gap_size ) {
|
||||
do_data_sieving = false;
|
||||
}
|
||||
|
||||
if ( do_data_sieving) {
|
||||
return mca_fbtl_posix_preadv_datasieving (fh);
|
||||
}
|
||||
else {
|
||||
return mca_fbtl_posix_preadv_generic (fh);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// i.e. fh->f_num_of_io_entries == 1
|
||||
ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, (off_t)fh->f_io_array[0].offset,
|
||||
(off_t)fh->f_io_array[0].length, OMPIO_LOCK_ENTIRE_REGION );
|
||||
if ( 0 < ret ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv: error in mca_fbtl_posix_lock() ret=%d: %s",
|
||||
ret, strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ret_code = pread(fh->fd, fh->f_io_array[0].memory_address, fh->f_io_array[0].length,
|
||||
(off_t)fh->f_io_array[0].offset );
|
||||
mca_fbtl_posix_unlock ( &lock, fh );
|
||||
if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv: error in (p)read(v):%s", strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
bytes_read += ret_code;
|
||||
}
|
||||
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh)
|
||||
{
|
||||
size_t start, end, len;
|
||||
size_t bufsize = 0;
|
||||
int ret, i, j;
|
||||
ssize_t bytes_read=0, ret_code=0;
|
||||
struct flock lock;
|
||||
char *temp_buf = NULL;
|
||||
|
||||
int startindex = 0;
|
||||
int endindex = 0;
|
||||
bool done = false;
|
||||
|
||||
while (!done) {
|
||||
// Break the io_array into chunks such that the size of the temporary
|
||||
// buffer does not exceed mca_fbtl_posix_max_tmpbuf_size bytes.
|
||||
// Each iteration will thus work in the range (startindex, endindex[
|
||||
startindex = endindex;
|
||||
if ( startindex >= fh->f_num_of_io_entries ) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t sstart = (size_t)fh->f_io_array[startindex].offset;
|
||||
size_t slen=0;
|
||||
|
||||
for ( j = startindex; j < fh->f_num_of_io_entries; j++ ) {
|
||||
endindex = j;
|
||||
slen = ((size_t)fh->f_io_array[j].offset + fh->f_io_array[j].length) - sstart;
|
||||
if (slen > mca_fbtl_posix_max_tmpbuf_size ) {
|
||||
endindex = j-1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Need to increment the value of endindex
|
||||
// by one for the loop syntax to work correctly.
|
||||
endindex++;
|
||||
|
||||
start = (size_t)fh->f_io_array[startindex].offset;
|
||||
end = (size_t)fh->f_io_array[endindex-1].offset + fh->f_io_array[endindex-1].length;
|
||||
len = end - start;
|
||||
|
||||
if ( len > bufsize ) {
|
||||
if ( NULL != temp_buf ) {
|
||||
free ( temp_buf);
|
||||
}
|
||||
temp_buf = (char *) malloc ( len );
|
||||
if ( NULL == temp_buf ) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
bufsize = len;
|
||||
}
|
||||
|
||||
// Read the entire block.
|
||||
ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, start, len, OMPIO_LOCK_ENTIRE_REGION );
|
||||
if ( 0 < ret ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv_datasieving: error in mca_fbtl_posix_lock() ret=%d: %s",
|
||||
ret, strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ret_code = pread (fh->fd, temp_buf, len, start);
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv_datasieving: error in (p)read(v):%s", strerror(errno));
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
// Copy out the elements that were requested.
|
||||
size_t pos = 0;
|
||||
size_t num_bytes;
|
||||
size_t start_offset = (size_t) fh->f_io_array[startindex].offset;
|
||||
for ( i = startindex ; i < endindex ; i++) {
|
||||
pos = (size_t) fh->f_io_array[i].offset - start_offset;
|
||||
if ( (ssize_t) pos > ret_code ) {
|
||||
break;
|
||||
}
|
||||
num_bytes = fh->f_io_array[i].length;
|
||||
if ( ((ssize_t) pos + (ssize_t)num_bytes) > ret_code ) {
|
||||
num_bytes = ret_code - (ssize_t)pos;
|
||||
}
|
||||
|
||||
memcpy (fh->f_io_array[i].memory_address, temp_buf + pos, num_bytes);
|
||||
bytes_read += num_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
free ( temp_buf);
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
ssize_t mca_fbtl_posix_preadv_generic (ompio_file_t *fh )
|
||||
{
|
||||
ssize_t bytes_read=0, ret_code=0;
|
||||
struct iovec *iov = NULL;
|
||||
struct flock lock;
|
||||
int ret, i;
|
||||
|
||||
int block=1;
|
||||
int iov_count = 0;
|
||||
OMPI_MPI_OFFSET_TYPE iov_offset = 0;
|
||||
off_t total_length, end_offset=0;
|
||||
|
||||
iov = (struct iovec *) malloc (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec));
|
||||
if (NULL == iov) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
||||
for (i=0 ; i<fh->f_num_of_io_entries ; i++) {
|
||||
if (0 == iov_count) {
|
||||
iov[iov_count].iov_base = fh->f_io_array[i].memory_address;
|
||||
iov[iov_count].iov_len = fh->f_io_array[i].length;
|
||||
iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset;
|
||||
if (0 == iov_count) {
|
||||
iov[iov_count].iov_base = fh->f_io_array[i].memory_address;
|
||||
iov[iov_count].iov_len = fh->f_io_array[i].length;
|
||||
iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset;
|
||||
end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length;
|
||||
iov_count ++;
|
||||
}
|
||||
|
||||
if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) {
|
||||
block ++;
|
||||
iov = (struct iovec *)realloc
|
||||
(iov, OMPIO_IOVEC_INITIAL_SIZE * block *
|
||||
sizeof(struct iovec));
|
||||
if (NULL == iov) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
if (fh->f_num_of_io_entries != i+1) {
|
||||
if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset +
|
||||
(ptrdiff_t)fh->f_io_array[i].length) ==
|
||||
(OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) &&
|
||||
(iov_count < IOV_MAX ) ){
|
||||
iov[iov_count].iov_base =
|
||||
fh->f_io_array[i+1].memory_address;
|
||||
iov[iov_count].iov_len = fh->f_io_array[i+1].length;
|
||||
end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length;
|
||||
iov_count ++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
iov_count ++;
|
||||
}
|
||||
|
||||
if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) {
|
||||
block ++;
|
||||
iov = (struct iovec *)realloc
|
||||
(iov, OMPIO_IOVEC_INITIAL_SIZE * block *
|
||||
sizeof(struct iovec));
|
||||
if (NULL == iov) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
if (fh->f_num_of_io_entries != i+1) {
|
||||
if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset +
|
||||
(ptrdiff_t)fh->f_io_array[i].length) ==
|
||||
(OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) &&
|
||||
(iov_count < IOV_MAX ) ){
|
||||
iov[iov_count].iov_base =
|
||||
fh->f_io_array[i+1].memory_address;
|
||||
iov[iov_count].iov_len = fh->f_io_array[i+1].length;
|
||||
end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length;
|
||||
iov_count ++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
total_length = (end_offset - (off_t)iov_offset );
|
||||
|
||||
|
||||
ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, iov_offset, total_length, OMPIO_LOCK_SELECTIVE );
|
||||
if ( 0 < ret ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv: error in mca_fbtl_posix_lock() ret=%d: %s", ret, strerror(errno));
|
||||
opal_output(1, "mca_fbtl_posix_preadv_generic: error in mca_fbtl_posix_lock() ret=%d: %s", ret, strerror(errno));
|
||||
free (iov);
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#if defined(HAVE_PREADV)
|
||||
ret_code = preadv (fh->fd, iov, iov_count, iov_offset);
|
||||
ret_code = preadv (fh->fd, iov, iov_count, iov_offset);
|
||||
#else
|
||||
if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv: error in lseek:%s", strerror(errno));
|
||||
if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv_generic: error in lseek:%s", strerror(errno));
|
||||
free(iov);
|
||||
mca_fbtl_posix_unlock ( &lock, fh );
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
ret_code = readv (fh->fd, iov, iov_count);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
ret_code = readv (fh->fd, iov, iov_count);
|
||||
#endif
|
||||
mca_fbtl_posix_unlock ( &lock, fh );
|
||||
if ( 0 < ret_code ) {
|
||||
bytes_read+=ret_code;
|
||||
}
|
||||
else if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv: error in (p)readv:%s", strerror(errno));
|
||||
if ( 0 < ret_code ) {
|
||||
bytes_read+=ret_code;
|
||||
}
|
||||
else if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_preadv_generic: error in (p)readv:%s", strerror(errno));
|
||||
free(iov);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
else if ( 0 == ret_code ){
|
||||
/* end of file reached, no point in continue reading; */
|
||||
break;
|
||||
}
|
||||
iov_count = 0;
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
else if ( 0 == ret_code ){
|
||||
/* end of file reached, no point in continue reading; */
|
||||
break;
|
||||
}
|
||||
iov_count = 0;
|
||||
}
|
||||
|
||||
free (iov);
|
||||
|
||||
return bytes_read;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2017 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -30,7 +30,187 @@
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/fbtl/fbtl.h"
|
||||
|
||||
static ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh );
|
||||
static ssize_t mca_fbtl_posix_pwritev_generic (ompio_file_t *fh );
|
||||
|
||||
ssize_t mca_fbtl_posix_pwritev(ompio_file_t *fh )
|
||||
{
|
||||
ssize_t bytes_written=0, ret_code=0;
|
||||
struct flock lock;
|
||||
int ret;
|
||||
|
||||
if (NULL == fh->f_io_array) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if ( fh->f_num_of_io_entries > 1 ) {
|
||||
bool do_data_sieving = true;
|
||||
|
||||
size_t avg_gap_size=0;
|
||||
size_t avg_block_size = 0;
|
||||
off_t prev_offset = (off_t)fh->f_io_array[0].offset;
|
||||
int i;
|
||||
for ( i=0; i< fh->f_num_of_io_entries; i++ ) {
|
||||
avg_block_size += fh->f_io_array[i].length;
|
||||
avg_gap_size += (size_t)((off_t)fh->f_io_array[i].offset - prev_offset);
|
||||
prev_offset = (off_t)fh->f_io_array[i].offset;
|
||||
}
|
||||
avg_block_size = avg_block_size / fh->f_num_of_io_entries;
|
||||
avg_gap_size = avg_gap_size / fh->f_num_of_io_entries;
|
||||
|
||||
if ( false == mca_fbtl_posix_write_datasieving ||
|
||||
0 == avg_gap_size ||
|
||||
avg_block_size > mca_fbtl_posix_max_block_size ||
|
||||
avg_gap_size > mca_fbtl_posix_max_gap_size ||
|
||||
ompi_mpi_thread_multiple ||
|
||||
!(fh->f_flags & OMPIO_COLLECTIVE_OP) ) {
|
||||
do_data_sieving = false;
|
||||
}
|
||||
|
||||
if ( do_data_sieving) {
|
||||
return mca_fbtl_posix_pwritev_datasieving (fh);
|
||||
}
|
||||
else {
|
||||
return mca_fbtl_posix_pwritev_generic (fh);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// i.e. fh->f_num_of_io_entries == 1
|
||||
ret = mca_fbtl_posix_lock ( &lock, fh, F_WRLCK, (off_t)fh->f_io_array[0].offset,
|
||||
(off_t)fh->f_io_array[0].length, OMPIO_LOCK_ENTIRE_REGION );
|
||||
if ( 0 < ret ) {
|
||||
opal_output(1, "mca_fbtl_posix_pwritev: error in mca_fbtl_posix_lock() ret=%d: %s",
|
||||
ret, strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ret_code = pwrite(fh->fd, fh->f_io_array[0].memory_address, fh->f_io_array[0].length,
|
||||
(off_t)fh->f_io_array[0].offset );
|
||||
mca_fbtl_posix_unlock ( &lock, fh );
|
||||
if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_pwritev: error in (p)write(v):%s", strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
bytes_written += ret_code;
|
||||
}
|
||||
|
||||
return bytes_written;
|
||||
}
|
||||
|
||||
ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh)
|
||||
{
|
||||
size_t start, end, len;
|
||||
size_t bufsize = 0;
|
||||
int ret, i, j;
|
||||
ssize_t bytes_written=0, ret_code=0;
|
||||
struct flock lock;
|
||||
char *temp_buf = NULL;
|
||||
|
||||
int startindex = 0;
|
||||
int endindex = 0;
|
||||
bool done = false;
|
||||
|
||||
while (!done) {
|
||||
// Break the io_array into chunks such that the size of the temporary
|
||||
// buffer does not exceed mca_fbtl_posix_max_tmpbuf_size bytes.
|
||||
// Each iteration will thus work in the range (startindex, endindex[
|
||||
startindex = endindex;
|
||||
if ( startindex >= fh->f_num_of_io_entries ) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t sstart = (size_t)fh->f_io_array[startindex].offset;
|
||||
size_t slen=0;
|
||||
|
||||
for ( j = startindex; j < fh->f_num_of_io_entries; j++ ) {
|
||||
endindex = j;
|
||||
slen = ((size_t)fh->f_io_array[j].offset + fh->f_io_array[j].length) - sstart;
|
||||
if (slen > mca_fbtl_posix_max_tmpbuf_size ) {
|
||||
endindex = j-1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Need to increment the value of endindex
|
||||
// by one for the loop syntax to work correctly.
|
||||
endindex++;
|
||||
|
||||
start = (size_t)fh->f_io_array[startindex].offset;
|
||||
end = (size_t)fh->f_io_array[endindex-1].offset + fh->f_io_array[endindex-1].length;
|
||||
len = end - start;
|
||||
|
||||
if ( len > bufsize ) {
|
||||
if ( NULL != temp_buf ) {
|
||||
free ( temp_buf);
|
||||
}
|
||||
temp_buf = (char *) malloc ( len );
|
||||
if ( NULL == temp_buf ) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
bufsize = len;
|
||||
}
|
||||
|
||||
// Read the entire block.
|
||||
ret = mca_fbtl_posix_lock ( &lock, fh, F_WRLCK, start, len, OMPIO_LOCK_ENTIRE_REGION );
|
||||
if ( 0 < ret ) {
|
||||
opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in mca_fbtl_posix_lock() ret=%d: %s",
|
||||
ret, strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ret_code = pread (fh->fd, temp_buf, len, start);
|
||||
if ( ret_code == -1 ) {
|
||||
//opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno));
|
||||
opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
// Copy out the elements to write into temporary buffer.
|
||||
size_t pos = 0;
|
||||
size_t num_bytes;
|
||||
size_t start_offset = (size_t) fh->f_io_array[startindex].offset;
|
||||
for ( i = startindex ; i < endindex ; i++) {
|
||||
pos = (size_t) fh->f_io_array[i].offset - start_offset;
|
||||
num_bytes = fh->f_io_array[i].length;
|
||||
memcpy (temp_buf + pos, fh->f_io_array[i].memory_address, num_bytes);
|
||||
bytes_written += num_bytes;
|
||||
}
|
||||
ret_code = pwrite (fh->fd, temp_buf, len, start);
|
||||
if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
if ( ret_code == -1 ) {
|
||||
opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno));
|
||||
/* Just in case some part of the lock worked */
|
||||
mca_fbtl_posix_unlock ( &lock, fh);
|
||||
free ( temp_buf);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
free ( temp_buf);
|
||||
return bytes_written;
|
||||
}
|
||||
|
||||
|
||||
ssize_t mca_fbtl_posix_pwritev_generic (ompio_file_t *fh )
|
||||
{
|
||||
/*int *fp = NULL;*/
|
||||
int i, block = 1, ret;
|
||||
|
@ -130,6 +130,7 @@ mca_fcoll_dynamic_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -132,6 +132,7 @@ mca_fcoll_dynamic_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -36,7 +36,6 @@ BEGIN_C_DECLS
|
||||
|
||||
extern int mca_fcoll_dynamic_gen2_priority;
|
||||
extern int mca_fcoll_dynamic_gen2_num_groups;
|
||||
extern int mca_fcoll_dynamic_gen2_write_chunksize;
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component;
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -42,7 +42,6 @@ const char *mca_fcoll_dynamic_gen2_component_version_string =
|
||||
*/
|
||||
int mca_fcoll_dynamic_gen2_priority = 10;
|
||||
int mca_fcoll_dynamic_gen2_num_groups = 1;
|
||||
int mca_fcoll_dynamic_gen2_write_chunksize = -1;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
@ -95,12 +94,5 @@ dynamic_gen2_register(void)
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_num_groups);
|
||||
|
||||
mca_fcoll_dynamic_gen2_write_chunksize = -1;
|
||||
(void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version,
|
||||
"write_chunksize", "Chunk size written at once. Default: stripe_size of the file system",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_write_chunksize);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -130,6 +130,7 @@ mca_fcoll_dynamic_gen2_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
@ -92,7 +92,7 @@ typedef struct mca_io_ompio_aggregator_data {
|
||||
static int shuffle_init ( int index, int cycles, int aggregator, int rank,
|
||||
mca_io_ompio_aggregator_data *data,
|
||||
ompi_request_t **reqs );
|
||||
static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize );
|
||||
static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data );
|
||||
|
||||
int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count,
|
||||
struct iovec *local_iov_array, int local_count,
|
||||
@ -111,8 +111,7 @@ static int local_heap_sort (mca_io_ompio_local_io_array *io_array,
|
||||
int *sorted);
|
||||
|
||||
int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_io_array_t *work_array,
|
||||
int num_entries, int *last_array_pos, int *last_pos_in_field,
|
||||
int chunk_size );
|
||||
int num_entries, int *last_array_pos, int *last_pos_in_field );
|
||||
|
||||
|
||||
int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
@ -145,7 +144,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
MPI_Aint *broken_total_lengths=NULL;
|
||||
|
||||
int *aggregators=NULL;
|
||||
int write_chunksize, *result_counts=NULL;
|
||||
int *result_counts=NULL;
|
||||
|
||||
|
||||
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
|
||||
@ -170,6 +169,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
@ -198,15 +198,9 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
|
||||
if ( fh->f_stripe_size == 0 ) {
|
||||
// EDGAR: just a quick heck for testing
|
||||
//fh->f_stripe_size = 1048576;
|
||||
fh->f_stripe_size = 65536;
|
||||
}
|
||||
if ( -1 == mca_fcoll_dynamic_gen2_write_chunksize ) {
|
||||
write_chunksize = fh->f_stripe_size;
|
||||
}
|
||||
else {
|
||||
write_chunksize = mca_fcoll_dynamic_gen2_write_chunksize;
|
||||
}
|
||||
|
||||
|
||||
ret = mca_fcoll_dynamic_gen2_get_configuration (fh, &dynamic_gen2_num_io_procs, &aggregators);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
@ -607,7 +601,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
|
||||
start_write_time = MPI_Wtime();
|
||||
#endif
|
||||
ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize );
|
||||
ret = write_init (fh, aggregators[i], aggr_data[i] );
|
||||
if (OMPI_SUCCESS != ret){
|
||||
goto exit;
|
||||
}
|
||||
@ -636,7 +630,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
|
||||
start_write_time = MPI_Wtime();
|
||||
#endif
|
||||
ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize );
|
||||
ret = write_init (fh, aggregators[i], aggr_data[i] );
|
||||
if (OMPI_SUCCESS != ret){
|
||||
goto exit;
|
||||
}
|
||||
@ -734,7 +728,7 @@ exit :
|
||||
}
|
||||
|
||||
|
||||
static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize )
|
||||
static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data )
|
||||
{
|
||||
int ret=OMPI_SUCCESS;
|
||||
int last_array_pos=0;
|
||||
@ -742,18 +736,36 @@ static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator
|
||||
|
||||
|
||||
if ( aggregator == fh->f_rank && aggr_data->prev_num_io_entries) {
|
||||
while ( aggr_data->prev_bytes_to_write > 0 ) {
|
||||
fh->f_flags |= OMPIO_COLLECTIVE_OP;
|
||||
while ( aggr_data->prev_bytes_to_write > 0 ) {
|
||||
ssize_t tret;
|
||||
aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array,
|
||||
aggr_data->prev_num_io_entries,
|
||||
&last_array_pos, &last_pos,
|
||||
write_chunksize );
|
||||
if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) {
|
||||
&last_array_pos, &last_pos );
|
||||
tret = fh->f_fbtl->fbtl_pwritev (fh);
|
||||
if ( 0 > tret ) {
|
||||
free ( aggr_data->prev_io_array);
|
||||
opal_output (1, "dynamic_gen2_write_all: fbtl_pwritev failed\n");
|
||||
ret = OMPI_ERROR;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
#if DEBUG_ON
|
||||
printf("fh->f_num_of_io_entries=%d\n", fh->f_num_of_io_entries);
|
||||
printf("[%d]: fh->f_io_array[0].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[0].offset,
|
||||
fh->f_io_array[0].length);
|
||||
if ( fh->f_num_of_io_entries > 1 )
|
||||
printf("[%d]: fh->f_io_array[1].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[1].offset,
|
||||
fh->f_io_array[1].length);
|
||||
|
||||
|
||||
int n = fh->f_num_of_io_entries-1;
|
||||
if ( fh->f_num_of_io_entries > 2 )
|
||||
printf("[%d]: fh->f_io_array[n].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[n].offset,
|
||||
fh->f_io_array[n].length);
|
||||
#endif
|
||||
}
|
||||
fh->f_flags &= ~OMPIO_COLLECTIVE_OP;
|
||||
free ( fh->f_io_array );
|
||||
free ( aggr_data->prev_io_array);
|
||||
}
|
||||
@ -800,7 +812,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
}
|
||||
|
||||
for(l=0;l<data->procs_per_group;l++){
|
||||
data->disp_index[l] = 1;
|
||||
data->disp_index[l] = 0;
|
||||
|
||||
if(data->max_disp_index[l] == 0) {
|
||||
data->blocklen_per_process[l] = (int *) calloc (INIT_LEN, sizeof(int));
|
||||
@ -879,8 +891,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
if (data->bytes_remaining <= data->bytes_to_write_in_cycle) {
|
||||
/* The data fits completely into the block */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_remaining;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
|
||||
(data->global_iov_array[data->sorted[data->current_index]].iov_len
|
||||
- data->bytes_remaining);
|
||||
@ -913,11 +925,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
/* the remaining data from the previous cycle is larger than the
|
||||
data->bytes_to_write_in_cycle, so we have to segment again */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
|
||||
(data->global_iov_array[data->sorted[data->current_index]].iov_len
|
||||
- data->bytes_remaining);
|
||||
data->disp_index[data->n] += 1;
|
||||
}
|
||||
|
||||
if (data->procs_in_group[data->n] == rank) {
|
||||
@ -934,9 +947,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
(MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) {
|
||||
/* This entry has more data than we can sendin one cycle */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ;
|
||||
data->disp_index[data->n] += 1;
|
||||
}
|
||||
if (data->procs_in_group[data->n] == rank) {
|
||||
bytes_sent += data->bytes_to_write_in_cycle;
|
||||
@ -950,9 +964,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
else {
|
||||
/* Next data entry is less than data->bytes_to_write_in_cycle */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] =
|
||||
data->global_iov_array[data->sorted[data->current_index]].iov_len;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t)
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)
|
||||
data->global_iov_array[data->sorted[data->current_index]].iov_base;
|
||||
|
||||
data->disp_index[data->n] += 1;
|
||||
@ -1592,14 +1606,15 @@ int mca_fcoll_dynamic_gen2_get_configuration (ompio_file_t *fh, int *dynamic_gen
|
||||
|
||||
|
||||
int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_io_array_t *io_array, int num_entries,
|
||||
int *ret_array_pos, int *ret_pos, int chunk_size )
|
||||
int *ret_array_pos, int *ret_pos )
|
||||
{
|
||||
|
||||
int array_pos = *ret_array_pos;
|
||||
int pos = *ret_pos;
|
||||
size_t bytes_written = 0;
|
||||
size_t bytes_to_write = chunk_size;
|
||||
|
||||
off_t baseaddr = ((off_t)io_array[array_pos].offset + pos) - (((off_t)io_array[array_pos].offset + pos) % (off_t)fh->f_stripe_size);
|
||||
off_t endaddr = baseaddr + fh->f_stripe_size;
|
||||
|
||||
if ( 0 == array_pos && 0 == pos ) {
|
||||
fh->f_io_array = (mca_common_ompio_io_array_t *) malloc ( num_entries * sizeof(mca_common_ompio_io_array_t));
|
||||
if ( NULL == fh->f_io_array ){
|
||||
@ -1609,32 +1624,28 @@ int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_
|
||||
}
|
||||
|
||||
int i=0;
|
||||
while (bytes_to_write > 0 ) {
|
||||
fh->f_io_array[i].memory_address = &(((char *)io_array[array_pos].memory_address)[pos]);
|
||||
fh->f_io_array[i].offset = &(((char *)io_array[array_pos].offset)[pos]);
|
||||
do {
|
||||
fh->f_io_array[i].memory_address = (char *)io_array[array_pos].memory_address + pos;
|
||||
fh->f_io_array[i].offset = (char *)io_array[array_pos].offset + pos;
|
||||
|
||||
if ( (io_array[array_pos].length - pos ) >= bytes_to_write ) {
|
||||
fh->f_io_array[i].length = bytes_to_write;
|
||||
off_t length = io_array[array_pos].length - pos;
|
||||
|
||||
if ( ( (off_t)fh->f_io_array[i].offset + length) < endaddr ) {
|
||||
fh->f_io_array[i].length = length;
|
||||
}
|
||||
else {
|
||||
fh->f_io_array[i].length = io_array[array_pos].length - pos;
|
||||
fh->f_io_array[i].length = endaddr - (size_t)fh->f_io_array[i].offset;
|
||||
}
|
||||
|
||||
|
||||
pos += fh->f_io_array[i].length;
|
||||
bytes_written += fh->f_io_array[i].length;
|
||||
bytes_to_write-= fh->f_io_array[i].length;
|
||||
i++;
|
||||
|
||||
if ( pos == (int)io_array[array_pos].length ) {
|
||||
pos = 0;
|
||||
if ((array_pos + 1) < num_entries) {
|
||||
array_pos++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
array_pos++;
|
||||
}
|
||||
}
|
||||
} while ( (array_pos < num_entries) && (((off_t)io_array[array_pos].offset+pos ) < endaddr) );
|
||||
|
||||
fh->f_num_of_io_entries = i;
|
||||
*ret_array_pos = array_pos;
|
||||
|
@ -155,6 +155,7 @@ mca_fcoll_two_phase_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&temp_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -185,6 +185,7 @@ mca_fcoll_two_phase_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&temp_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -129,6 +129,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -185,6 +185,7 @@ int mca_fcoll_vulcan_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
@ -770,7 +771,9 @@ static int write_init (ompio_file_t *fh,
|
||||
}
|
||||
}
|
||||
else {
|
||||
fh->f_flags |= OMPIO_COLLECTIVE_OP;
|
||||
ret_temp = fh->f_fbtl->fbtl_pwritev(fh);
|
||||
fh->f_flags &= ~OMPIO_COLLECTIVE_OP;
|
||||
if(0 > ret_temp) {
|
||||
opal_output (1, "vulcan_write_all: fbtl_pwritev failed\n");
|
||||
ret = ret_temp;
|
||||
@ -835,7 +838,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
|
||||
|
||||
for(l=0;l<data->procs_per_group;l++){
|
||||
data->disp_index[l] = 1;
|
||||
data->disp_index[l] = 0;
|
||||
|
||||
if ( data->max_disp_index[l] == 0 ) {
|
||||
data->blocklen_per_process[l] = (int *) calloc (INIT_LEN, sizeof(int));
|
||||
@ -914,8 +917,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
if (data->bytes_remaining <= data->bytes_to_write_in_cycle) {
|
||||
/* The data fits completely into the block */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_remaining;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
|
||||
(data->global_iov_array[data->sorted[data->current_index]].iov_len
|
||||
- data->bytes_remaining);
|
||||
@ -949,11 +952,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
/* the remaining data from the previous cycle is larger than the
|
||||
data->bytes_to_write_in_cycle, so we have to segment again */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
|
||||
(data->global_iov_array[data->sorted[data->current_index]].iov_len
|
||||
- data->bytes_remaining);
|
||||
data->disp_index[data->n] += 1;
|
||||
}
|
||||
|
||||
if (data->procs_in_group[data->n] == rank) {
|
||||
@ -970,9 +974,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
(MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) {
|
||||
/* This entry has more data than we can sendin one cycle */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] =
|
||||
(ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ;
|
||||
data->disp_index[data->n] += 1;
|
||||
}
|
||||
if (data->procs_in_group[data->n] == rank) {
|
||||
bytes_sent += data->bytes_to_write_in_cycle;
|
||||
@ -986,9 +991,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
|
||||
else {
|
||||
/* Next data entry is less than data->bytes_to_write_in_cycle */
|
||||
if (aggregator == rank) {
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] =
|
||||
data->blocklen_per_process[data->n][data->disp_index[data->n]] =
|
||||
data->global_iov_array[data->sorted[data->current_index]].iov_len;
|
||||
data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t)
|
||||
data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)
|
||||
data->global_iov_array[data->sorted[data->current_index]].iov_base;
|
||||
|
||||
data->disp_index[data->n] += 1;
|
||||
|
@ -64,22 +64,6 @@ int mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
struct opal_info_t *info,
|
||||
ompio_file_t *fh);
|
||||
|
||||
int mca_fs_lustre_file_close (ompio_file_t *fh);
|
||||
|
||||
int mca_fs_lustre_file_delete (char *filename,
|
||||
struct opal_info_t *info);
|
||||
|
||||
int mca_fs_lustre_file_set_size (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE size);
|
||||
|
||||
int mca_fs_lustre_file_get_size (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE *size);
|
||||
|
||||
int mca_fs_lustre_file_sync (ompio_file_t *fh);
|
||||
|
||||
int mca_fs_lustre_file_seek (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
int whence);
|
||||
/*
|
||||
* ******************************************************************
|
||||
* ************ functions implemented in this module end ************
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* Copyright (c) 2015-2020 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -29,6 +29,7 @@
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/fs/fs.h"
|
||||
#include "ompi/mca/fs/base/base.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/info/info.h"
|
||||
|
||||
@ -63,8 +64,7 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
struct opal_info_t *info,
|
||||
ompio_file_t *fh)
|
||||
{
|
||||
int amode, rank;
|
||||
int old_mask, perm;
|
||||
int amode, perm;
|
||||
int rc, ret=OMPI_SUCCESS;
|
||||
int flag;
|
||||
int fs_lustre_stripe_size = -1;
|
||||
@ -73,24 +73,8 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
|
||||
struct lov_user_md *lump=NULL;
|
||||
|
||||
if (fh->f_perm == OMPIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else {
|
||||
perm = fh->f_perm;
|
||||
}
|
||||
|
||||
rank = fh->f_rank;
|
||||
|
||||
amode = 0;
|
||||
if (access_mode & MPI_MODE_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (access_mode & MPI_MODE_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (access_mode & MPI_MODE_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
perm = mca_fs_base_get_file_perm(fh);
|
||||
amode = mca_fs_base_get_file_amode(fh->f_rank, access_mode);
|
||||
|
||||
opal_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag);
|
||||
if ( flag ) {
|
||||
@ -113,13 +97,7 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
|
||||
/* Reset errno */
|
||||
errno = 0;
|
||||
if (0 == fh->f_rank) {
|
||||
/* MODE_CREATE and MODE_EXCL can only be set by one process */
|
||||
if ( access_mode & MPI_MODE_CREATE )
|
||||
amode = amode | O_CREAT;
|
||||
if (access_mode & MPI_MODE_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
if (OMPIO_ROOT == fh->f_rank) {
|
||||
if ( (fs_lustre_stripe_size>0 || fs_lustre_stripe_width>0) &&
|
||||
( amode&O_CREAT) &&
|
||||
( (amode&O_RDWR)|| amode&O_WRONLY) ) {
|
||||
@ -134,28 +112,9 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
else {
|
||||
fh->fd = open (filename, amode, perm);
|
||||
}
|
||||
|
||||
if ( 0 > fh->fd ) {
|
||||
if ( EACCES == errno ) {
|
||||
ret = MPI_ERR_ACCESS;
|
||||
}
|
||||
else if ( ENAMETOOLONG == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( ENOENT == errno ) {
|
||||
ret = MPI_ERR_NO_SUCH_FILE;
|
||||
}
|
||||
else if ( EISDIR == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( EROFS == errno ) {
|
||||
ret = MPI_ERR_READ_ONLY;
|
||||
}
|
||||
else if ( EEXIST == errno ) {
|
||||
ret = MPI_ERR_FILE_EXISTS;
|
||||
}
|
||||
else {
|
||||
ret = MPI_ERR_OTHER;
|
||||
}
|
||||
ret = mca_fs_base_get_mpi_err(errno);
|
||||
}
|
||||
}
|
||||
|
||||
@ -165,39 +124,17 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ( 0 != rank ) {
|
||||
if (OMPIO_ROOT != fh->f_rank) {
|
||||
fh->fd = open (filename, amode, perm);
|
||||
if ( 0 > fh->fd) {
|
||||
if ( EACCES == errno ) {
|
||||
ret = MPI_ERR_ACCESS;
|
||||
}
|
||||
else if ( ENAMETOOLONG == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( ENOENT == errno ) {
|
||||
ret = MPI_ERR_NO_SUCH_FILE;
|
||||
}
|
||||
else if ( EISDIR == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( EROFS == errno ) {
|
||||
ret = MPI_ERR_READ_ONLY;
|
||||
}
|
||||
else if ( EEXIST == errno ) {
|
||||
ret = MPI_ERR_FILE_EXISTS;
|
||||
}
|
||||
else {
|
||||
ret = MPI_ERR_OTHER;
|
||||
}
|
||||
return mca_fs_base_get_mpi_err(errno);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
lump = alloc_lum();
|
||||
if (NULL == lump ){
|
||||
fprintf(stderr,"Cannot allocate memory for extracting stripe size\n");
|
||||
return OMPI_ERROR;
|
||||
fprintf(stderr,"Cannot allocate memory for extracting stripe size\n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
rc = llapi_file_get_stripe(filename, lump);
|
||||
if (rc != 0) {
|
||||
@ -207,6 +144,7 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm,
|
||||
fh->f_stripe_size = lump->lmm_stripe_size;
|
||||
fh->f_stripe_count = lump->lmm_stripe_count;
|
||||
fh->f_fs_block_size = lump->lmm_stripe_size;
|
||||
fh->f_flags |= OMPIO_LOCK_NEVER;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -89,9 +89,6 @@ int mca_fs_pvfs2_file_get_size (ompio_file_t *fh,
|
||||
|
||||
int mca_fs_pvfs2_file_sync (ompio_file_t *fh);
|
||||
|
||||
int mca_fs_pvfs2_file_seek (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
int whence);
|
||||
/*
|
||||
* ******************************************************************
|
||||
* ************ functions implemented in this module end ************
|
||||
|
@ -60,22 +60,6 @@ int mca_fs_ufs_file_open (struct ompi_communicator_t *comm,
|
||||
struct opal_info_t *info,
|
||||
ompio_file_t *fh);
|
||||
|
||||
int mca_fs_ufs_file_close (ompio_file_t *fh);
|
||||
|
||||
int mca_fs_ufs_file_delete (char *filename,
|
||||
struct opal_info_t *info);
|
||||
|
||||
int mca_fs_ufs_file_set_size (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE size);
|
||||
|
||||
int mca_fs_ufs_file_get_size (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE *size);
|
||||
|
||||
int mca_fs_ufs_file_sync (ompio_file_t *fh);
|
||||
|
||||
int mca_fs_ufs_file_seek (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
int whence);
|
||||
/*
|
||||
* ******************************************************************
|
||||
* ************ functions implemented in this module end ************
|
||||
|
@ -48,114 +48,32 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm,
|
||||
struct opal_info_t *info,
|
||||
ompio_file_t *fh)
|
||||
{
|
||||
int amode;
|
||||
int old_mask, perm;
|
||||
int rank, ret=OMPI_SUCCESS;
|
||||
int amode, perm;
|
||||
int ret=OMPI_SUCCESS;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
|
||||
if (fh->f_perm == OMPIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else {
|
||||
perm = fh->f_perm;
|
||||
}
|
||||
|
||||
amode = 0;
|
||||
|
||||
if (access_mode & MPI_MODE_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (access_mode & MPI_MODE_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (access_mode & MPI_MODE_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
perm = mca_fs_base_get_file_perm(fh);
|
||||
amode = mca_fs_base_get_file_amode(fh->f_rank, access_mode);
|
||||
|
||||
/* Reset errno */
|
||||
errno = 0;
|
||||
if ( 0 == rank ) {
|
||||
/* MODE_CREATE and MODE_EXCL can only be set by one process */
|
||||
if ( access_mode & MPI_MODE_CREATE )
|
||||
amode = amode | O_CREAT;
|
||||
if (access_mode & MPI_MODE_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
fh->fd = open (filename, amode, perm);
|
||||
if ( 0 > fh->fd ) {
|
||||
if ( EACCES == errno ) {
|
||||
ret = MPI_ERR_ACCESS;
|
||||
}
|
||||
else if ( ENAMETOOLONG == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( ENOENT == errno ) {
|
||||
ret = MPI_ERR_NO_SUCH_FILE;
|
||||
}
|
||||
else if ( EISDIR == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( EROFS == errno ) {
|
||||
ret = MPI_ERR_READ_ONLY;
|
||||
}
|
||||
else if ( EEXIST == errno ) {
|
||||
ret = MPI_ERR_FILE_EXISTS;
|
||||
}
|
||||
else if ( ENOSPC == errno ) {
|
||||
ret = MPI_ERR_NO_SPACE;
|
||||
}
|
||||
else if ( EDQUOT == errno ) {
|
||||
ret = MPI_ERR_QUOTA;
|
||||
}
|
||||
else if ( ETXTBSY == errno ) {
|
||||
ret = MPI_ERR_FILE_IN_USE;
|
||||
}
|
||||
else {
|
||||
ret = MPI_ERR_OTHER;
|
||||
}
|
||||
if (OMPIO_ROOT == fh->f_rank) {
|
||||
fh->fd = open (filename, amode, perm);
|
||||
if ( 0 > fh->fd ) {
|
||||
ret = mca_fs_base_get_mpi_err(errno);
|
||||
}
|
||||
}
|
||||
|
||||
comm->c_coll->coll_bcast ( &ret, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
fh->fd = -1;
|
||||
return ret;
|
||||
fh->fd = -1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ( 0 != rank ) {
|
||||
fh->fd = open (filename, amode, perm);
|
||||
if ( 0 > fh->fd) {
|
||||
if ( EACCES == errno ) {
|
||||
ret = MPI_ERR_ACCESS;
|
||||
}
|
||||
else if ( ENAMETOOLONG == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( ENOENT == errno ) {
|
||||
ret = MPI_ERR_NO_SUCH_FILE;
|
||||
}
|
||||
else if ( EISDIR == errno ) {
|
||||
ret = MPI_ERR_BAD_FILE;
|
||||
}
|
||||
else if ( EROFS == errno ) {
|
||||
ret = MPI_ERR_READ_ONLY;
|
||||
}
|
||||
else if ( EEXIST == errno ) {
|
||||
ret = MPI_ERR_FILE_EXISTS;
|
||||
}
|
||||
else if ( ENOSPC == errno ) {
|
||||
ret = MPI_ERR_NO_SPACE;
|
||||
}
|
||||
else if ( EDQUOT == errno ) {
|
||||
ret = MPI_ERR_QUOTA;
|
||||
}
|
||||
else if ( ETXTBSY == errno ) {
|
||||
ret = MPI_ERR_FILE_IN_USE;
|
||||
}
|
||||
else {
|
||||
ret = MPI_ERR_OTHER;
|
||||
}
|
||||
}
|
||||
if (OMPIO_ROOT != fh->f_rank) {
|
||||
fh->fd = open (filename, amode, perm);
|
||||
if ( 0 > fh->fd) {
|
||||
return mca_fs_base_get_mpi_err(errno);
|
||||
}
|
||||
}
|
||||
|
||||
fh->f_stripe_size=0;
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/errhandler/errhandler.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "ompi/file/file.h"
|
||||
#include "ompi/mca/io/io.h"
|
||||
#include "ompi/mca/fs/fs.h"
|
||||
|
@ -10,12 +10,13 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2020 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2018 DataDirect Networks. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,17 +28,18 @@
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "ompi/mca/io/io.h"
|
||||
#include "ompi/mca/fs/base/base.h"
|
||||
#include "io_ompio.h"
|
||||
#include "ompi/mca/common/ompio/common_ompio_request.h"
|
||||
#include "ompi/mca/common/ompio/common_ompio_buffer.h"
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/ompio/common_ompio_cuda.h"
|
||||
#ifdef HAVE_IME_NATIVE_H
|
||||
#include "ompi/mca/fs/ime/fs_ime.h"
|
||||
#endif
|
||||
|
||||
|
||||
int mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE;
|
||||
int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE;
|
||||
int mca_io_ompio_num_aggregators = -1;
|
||||
@ -275,13 +277,13 @@ static int open_component(void)
|
||||
static int close_component(void)
|
||||
{
|
||||
mca_common_ompio_request_fini ();
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_common_ompio_cuda_alloc_fini();
|
||||
#endif
|
||||
|
||||
mca_common_ompio_buffer_alloc_fini();
|
||||
OBJ_DESTRUCT(&mca_io_ompio_mutex);
|
||||
|
||||
#ifdef HAVE_IME_NATIVE_H
|
||||
mca_fs_ime_native_fini();
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -299,42 +301,11 @@ file_query(struct ompi_file_t *file,
|
||||
int *priority)
|
||||
{
|
||||
mca_common_ompio_data_t *data;
|
||||
char *tmp;
|
||||
int rank;
|
||||
int is_lustre=0; //false
|
||||
|
||||
tmp = strchr (file->f_filename, ':');
|
||||
rank = ompi_comm_rank ( file->f_comm);
|
||||
if (!tmp) {
|
||||
if ( 0 == rank) {
|
||||
if (LUSTRE == mca_fs_base_get_fstype(file->f_filename)) {
|
||||
is_lustre = 1; //true
|
||||
}
|
||||
}
|
||||
|
||||
file->f_comm->c_coll->coll_bcast (&is_lustre,
|
||||
1,
|
||||
MPI_INT,
|
||||
0,
|
||||
file->f_comm,
|
||||
file->f_comm->c_coll->coll_bcast_module);
|
||||
}
|
||||
else {
|
||||
if (!strncasecmp(file->f_filename, "lustre:", 7) ) {
|
||||
is_lustre = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_lustre) {
|
||||
*priority = 1;
|
||||
}
|
||||
else {
|
||||
*priority = priority_param;
|
||||
}
|
||||
*priority = priority_param;
|
||||
|
||||
/* Allocate a space for this module to hang private data (e.g.,
|
||||
the OMPIO file handle) */
|
||||
|
||||
data = calloc(1, sizeof(mca_common_ompio_data_t));
|
||||
if (NULL == data) {
|
||||
return NULL;
|
||||
@ -343,7 +314,6 @@ file_query(struct ompi_file_t *file,
|
||||
*private_data = (struct mca_io_base_file_t*) data;
|
||||
|
||||
/* All done */
|
||||
|
||||
return &mca_io_ompio_module;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -137,12 +137,11 @@ int mca_io_ompio_file_read_all (ompi_file_t *fh,
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
ret = data->ompio_fh.
|
||||
f_fcoll->fcoll_file_read_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
ret = mca_common_ompio_file_read_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
size_t size;
|
||||
@ -162,25 +161,15 @@ int mca_io_ompio_file_iread_all (ompi_file_t *fh,
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
mca_common_ompio_data_t *data=NULL;
|
||||
ompio_file_t *fp=NULL;
|
||||
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
fp = &data->ompio_fh;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
ret = mca_common_ompio_file_iread_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -66,7 +66,8 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp,
|
||||
mca_common_ompio_data_t *data;
|
||||
ompio_file_t *fh;
|
||||
|
||||
if ( (strcmp(datarep, "native") && strcmp(datarep, "NATIVE"))) {
|
||||
if ( (strcmp(datarep, "native") && strcmp(datarep, "NATIVE") &&
|
||||
strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
|
||||
return MPI_ERR_UNSUPPORTED_DATAREP;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -143,12 +143,11 @@ int mca_io_ompio_file_write_all (ompi_file_t *fh,
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
ret = data->ompio_fh.
|
||||
f_fcoll->fcoll_file_write_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
ret = mca_common_ompio_file_write_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
size_t size;
|
||||
@ -186,25 +185,15 @@ int mca_io_ompio_file_iwrite_all (ompi_file_t *fh,
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
mca_common_ompio_data_t *data=NULL;
|
||||
ompio_file_t *fp=NULL;
|
||||
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
fp = &data->ompio_fh;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iwrite_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iwrite ( fp, buf, count, datatype, request );
|
||||
}
|
||||
ret = mca_common_ompio_file_iwrite_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2011 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2016-2019 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -20,7 +20,6 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "ompi/mca/io/io.h"
|
||||
#include "io_ompio.h"
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user