Merge pull request #6697 from edgargabriel/topic/external32-v2
Topic/external32 v2
Этот коммит содержится в:
Коммит
88b3b24101
@ -25,6 +25,7 @@ headers = \
|
||||
common_ompio_aggregators.h \
|
||||
common_ompio_print_queue.h \
|
||||
common_ompio_request.h \
|
||||
common_ompio_buffer.h \
|
||||
common_ompio.h
|
||||
|
||||
sources = \
|
||||
@ -34,6 +35,7 @@ sources = \
|
||||
common_ompio_file_open.c \
|
||||
common_ompio_file_view.c \
|
||||
common_ompio_file_read.c \
|
||||
common_ompio_buffer.c \
|
||||
common_ompio_file_write.c
|
||||
|
||||
|
||||
@ -74,10 +76,6 @@ else
|
||||
ompidir = $(includedir)
|
||||
endif
|
||||
|
||||
if OPAL_cuda_support
|
||||
headers += common_ompio_cuda.h
|
||||
sources += common_ompio_cuda.c
|
||||
endif
|
||||
|
||||
# These two rules will sym link the "noinst" libtool library filename
|
||||
# to the installable libtool library filename in the case where we are
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018 DataDirect Networks. All rights reserved.
|
||||
@ -66,7 +66,7 @@
|
||||
#define OMPIO_LOCK_ENTIRE_FILE 0x00000080
|
||||
#define OMPIO_LOCK_NEVER 0x00000100
|
||||
#define OMPIO_LOCK_NOT_THIS_OP 0x00000200
|
||||
|
||||
#define OMPIO_DATAREP_NATIVE 0x00000400
|
||||
|
||||
#define OMPIO_ROOT 0
|
||||
|
||||
@ -157,7 +157,8 @@ struct ompio_file_t {
|
||||
ompi_communicator_t *f_comm;
|
||||
const char *f_filename;
|
||||
char *f_datarep;
|
||||
opal_convertor_t *f_convertor;
|
||||
opal_convertor_t *f_mem_convertor;
|
||||
opal_convertor_t *f_file_convertor;
|
||||
opal_info_t *f_info;
|
||||
int32_t f_flags;
|
||||
void *f_fs_ptr;
|
||||
@ -253,10 +254,16 @@ OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_O
|
||||
const void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_write_all (ompio_file_t *fh, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_write_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_all (ompio_file_t *fp, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
|
||||
int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
|
||||
@ -282,10 +289,16 @@ OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFF
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_read_all (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_read_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iread_all (ompio_file_t *fp, void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf, int count, struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request);
|
||||
@ -318,6 +331,7 @@ OMPI_DECLSPEC int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
int count,
|
||||
const void *buf,
|
||||
size_t *max_data,
|
||||
opal_convertor_t *convertor,
|
||||
struct iovec **iov,
|
||||
uint32_t *iov_count);
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,18 +27,19 @@
|
||||
#include "opal/mca/allocator/allocator.h"
|
||||
#include "opal/mca/allocator/base/base.h"
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_cuda.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
|
||||
|
||||
static opal_mutex_t mca_common_ompio_cuda_mutex; /* lock for thread safety */
|
||||
static opal_mutex_t mca_common_ompio_buffer_mutex; /* lock for thread safety */
|
||||
static mca_allocator_base_component_t* mca_common_ompio_allocator_component=NULL;
|
||||
static mca_allocator_base_module_t* mca_common_ompio_allocator=NULL;
|
||||
|
||||
static opal_atomic_int32_t mca_common_ompio_cuda_init = 0;
|
||||
static opal_atomic_int32_t mca_common_ompio_buffer_init = 0;
|
||||
static int32_t mca_common_ompio_pagesize=4096;
|
||||
static void* mca_common_ompio_cuda_alloc_seg ( void *ctx, size_t *size );
|
||||
static void mca_common_ompio_cuda_free_seg ( void *ctx, void *buf );
|
||||
static void* mca_common_ompio_buffer_alloc_seg ( void *ctx, size_t *size );
|
||||
static void mca_common_ompio_buffer_free_seg ( void *ctx, void *buf );
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is_gpu,
|
||||
int *is_managed)
|
||||
{
|
||||
@ -57,8 +58,9 @@ void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void* mca_common_ompio_cuda_alloc_seg ( void*ctx, size_t *size )
|
||||
static void* mca_common_ompio_buffer_alloc_seg ( void*ctx, size_t *size )
|
||||
{
|
||||
char *buf=NULL;
|
||||
size_t realsize, numpages;
|
||||
@ -67,64 +69,67 @@ static void* mca_common_ompio_cuda_alloc_seg ( void*ctx, size_t *size )
|
||||
realsize = numpages * mca_common_ompio_pagesize;
|
||||
|
||||
buf = malloc ( realsize);
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( NULL != buf ) {
|
||||
mca_common_cuda_register ( ( char *)buf, realsize, NULL );
|
||||
}
|
||||
#endif
|
||||
*size = realsize;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void mca_common_ompio_cuda_free_seg ( void *ctx, void *buf )
|
||||
static void mca_common_ompio_buffer_free_seg ( void *ctx, void *buf )
|
||||
{
|
||||
if ( NULL != buf ) {
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_common_cuda_unregister ( (char *) buf, NULL );
|
||||
#endif
|
||||
free ( buf );
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int mca_common_ompio_cuda_alloc_init ( void )
|
||||
int mca_common_ompio_buffer_alloc_init ( void )
|
||||
{
|
||||
bool thread_safe=true;
|
||||
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_common_ompio_cuda_init, 1) > 1)
|
||||
if(OPAL_THREAD_ADD_FETCH32(&mca_common_ompio_buffer_init, 1) > 1)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* initialize static objects */
|
||||
OBJ_CONSTRUCT(&mca_common_ompio_cuda_mutex, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_common_ompio_buffer_mutex, opal_mutex_t);
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex );
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex );
|
||||
/* lookup name of the allocator to use */
|
||||
if(NULL == (mca_common_ompio_allocator_component = mca_allocator_component_lookup("basic"))) {
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_ERR_BUFFER;
|
||||
}
|
||||
|
||||
/* create an instance of the allocator */
|
||||
mca_common_ompio_allocator = mca_common_ompio_allocator_component->allocator_init(thread_safe,
|
||||
mca_common_ompio_cuda_alloc_seg,
|
||||
mca_common_ompio_cuda_free_seg,
|
||||
mca_common_ompio_buffer_alloc_seg,
|
||||
mca_common_ompio_buffer_free_seg,
|
||||
NULL);
|
||||
if(NULL == mca_common_ompio_allocator) {
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_ERR_BUFFER;
|
||||
}
|
||||
|
||||
// mca_common_ompio_pagesize = sysconf(_SC_PAGESIZE);
|
||||
mca_common_ompio_pagesize = opal_getpagesize();
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK(&mca_common_ompio_buffer_mutex);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_common_ompio_cuda_alloc_fini ( void )
|
||||
int mca_common_ompio_buffer_alloc_fini ( void )
|
||||
{
|
||||
if ( NULL != mca_common_ompio_allocator ) {
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
mca_common_ompio_allocator->alc_finalize(mca_common_ompio_allocator);
|
||||
mca_common_ompio_allocator=NULL;
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OBJ_DESTRUCT (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
OBJ_DESTRUCT (&mca_common_ompio_buffer_mutex);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -134,31 +139,31 @@ void *mca_common_ompio_alloc_buf ( ompio_file_t *fh, size_t bufsize )
|
||||
{
|
||||
char *tmp=NULL;
|
||||
|
||||
if ( !mca_common_ompio_cuda_init ){
|
||||
mca_common_ompio_cuda_alloc_init ();
|
||||
if ( !mca_common_ompio_buffer_init ){
|
||||
mca_common_ompio_buffer_alloc_init ();
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
tmp = mca_common_ompio_allocator->alc_alloc (mca_common_ompio_allocator,
|
||||
bufsize, 0 );
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void mca_common_ompio_release_buf ( ompio_file_t *fh, void *buf )
|
||||
{
|
||||
|
||||
if ( !mca_common_ompio_cuda_init ){
|
||||
if ( !mca_common_ompio_buffer_init ){
|
||||
/* Should not happen. You can not release a buf without
|
||||
** having it allocated first.
|
||||
*/
|
||||
opal_output (1, "error in mca_common_ompio_release_buf: allocator not initialized\n");
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_LOCK (&mca_common_ompio_buffer_mutex);
|
||||
mca_common_ompio_allocator->alc_free (mca_common_ompio_allocator,
|
||||
buf);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_cuda_mutex);
|
||||
OPAL_THREAD_UNLOCK (&mca_common_ompio_buffer_mutex);
|
||||
|
||||
return;
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -22,9 +22,9 @@
|
||||
#define MCA_COMMON_OMPIO_CUDA_H
|
||||
|
||||
|
||||
#define OMPIO_CUDA_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
opal_convertor_clone ( _fh->f_convertor, _convertor, 0); \
|
||||
opal_convertor_prepare_for_send ( _convertor, &(_datatype->super), _count, _buf );\
|
||||
#define OMPIO_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
|
||||
opal_convertor_copy_and_prepare_for_send ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, CONVERTOR_SEND_CONVERSION, _convertor ); \
|
||||
opal_convertor_get_packed_size( _convertor, &_max_data ); \
|
||||
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
|
||||
if ( NULL == _tbuf ) { \
|
||||
@ -40,11 +40,30 @@
|
||||
_decoded_iov->iov_len = _max_data; \
|
||||
_iov_count=1;}
|
||||
|
||||
#define OMPIO_PREPARE_READ_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
|
||||
opal_convertor_copy_and_prepare_for_recv ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, 0, _convertor ); \
|
||||
opal_convertor_get_packed_size( _convertor, &_max_data ); \
|
||||
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
|
||||
if ( NULL == _tbuf ) { \
|
||||
opal_output(1, "common_ompio: error allocating memory\n"); \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
_decoded_iov = (struct iovec *) malloc ( sizeof ( struct iovec )); \
|
||||
if ( NULL == _decoded_iov ) { \
|
||||
opal_output(1, "common_ompio: could not allocate memory.\n"); \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
_decoded_iov->iov_base = _tbuf; \
|
||||
_decoded_iov->iov_len = _max_data; \
|
||||
_iov_count=1;}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf,
|
||||
int *is_gpu, int *is_managed);
|
||||
int mca_common_ompio_cuda_alloc_init ( void );
|
||||
int mca_common_ompio_cuda_alloc_fini ( void );
|
||||
#endif
|
||||
int mca_common_ompio_buffer_alloc_init ( void );
|
||||
int mca_common_ompio_buffer_alloc_fini ( void );
|
||||
|
||||
|
||||
void* mca_common_ompio_alloc_buf ( ompio_file_t *fh, size_t bufsize);
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
@ -75,7 +75,8 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
|
||||
ompio_fh->f_rank = ompi_comm_rank (comm);
|
||||
ompio_fh->f_size = ompi_comm_size (comm);
|
||||
remote_arch = opal_local_arch;
|
||||
ompio_fh->f_convertor = opal_convertor_create (remote_arch, 0);
|
||||
ompio_fh->f_mem_convertor = opal_convertor_create (remote_arch, 0);
|
||||
ompio_fh->f_file_convertor = opal_convertor_create (remote_arch, 0);
|
||||
|
||||
if ( true == use_sharedfp ) {
|
||||
ret = ompi_comm_dup (comm, &ompio_fh->f_comm);
|
||||
@ -323,17 +324,23 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
|
||||
ompio_fh->f_decoded_iov = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_convertor) {
|
||||
free (ompio_fh->f_convertor);
|
||||
ompio_fh->f_convertor = NULL;
|
||||
if (NULL != ompio_fh->f_mem_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_mem_convertor);
|
||||
free (ompio_fh->f_mem_convertor);
|
||||
ompio_fh->f_mem_convertor = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_file_convertor);
|
||||
free (ompio_fh->f_file_convertor);
|
||||
ompio_fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_datarep) {
|
||||
free (ompio_fh->f_datarep);
|
||||
ompio_fh->f_datarep = NULL;
|
||||
}
|
||||
|
||||
|
||||
if ( NULL != ompio_fh->f_coll_write_time ) {
|
||||
free ( ompio_fh->f_coll_write_time );
|
||||
ompio_fh->f_coll_write_time = NULL;
|
||||
@ -384,6 +391,13 @@ int mca_common_ompio_file_get_position (ompio_file_t *fh,
|
||||
{
|
||||
OMPI_MPI_OFFSET_TYPE off;
|
||||
|
||||
if ( 0 == fh->f_view_extent ||
|
||||
0 == fh->f_view_size ||
|
||||
0 == fh->f_etype_size ) {
|
||||
/* not sure whether we should raise an error here */
|
||||
*offset = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* No. of copies of the entire file view */
|
||||
off = (fh->f_offset - fh->f_disp)/fh->f_view_extent;
|
||||
|
||||
@ -557,6 +571,7 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
int count,
|
||||
const void *buf,
|
||||
size_t *max_data,
|
||||
opal_convertor_t *conv,
|
||||
struct iovec **iov,
|
||||
uint32_t *iovec_count)
|
||||
{
|
||||
@ -571,7 +586,7 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
size_t temp_data;
|
||||
|
||||
|
||||
opal_convertor_clone (fh->f_convertor, &convertor, 0);
|
||||
opal_convertor_clone (conv, &convertor, 0);
|
||||
|
||||
if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor,
|
||||
&(datatype->super),
|
||||
@ -667,7 +682,8 @@ int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
|
||||
}
|
||||
|
||||
free (temp_iov);
|
||||
|
||||
opal_convertor_cleanup (&convertor);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -33,12 +33,10 @@
|
||||
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_request.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
|
||||
/* Read and write routines are split into two interfaces.
|
||||
** The
|
||||
@ -90,35 +88,44 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool need_to_copy = false;
|
||||
opal_convertor_t convertor;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
opal_convertor_t convertor;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
char *tbuf=NULL;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
}
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
}
|
||||
else {
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
status->_ucount = 0;
|
||||
@ -170,15 +177,14 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( is_gpu && !is_managed ) {
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
|
||||
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup (&convertor);
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
@ -257,13 +263,32 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
|
||||
int i = 0; /* index into the decoded iovec of the buffer */
|
||||
int j = 0; /* index into the file vie iovec */
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
char *tbuf=NULL;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
ompio_req->req_tbuf = tbuf;
|
||||
ompio_req->req_size = max_data;
|
||||
@ -274,18 +299,11 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
ompio_req->req_ompi.req_status._ucount = 0;
|
||||
@ -376,6 +394,62 @@ int mca_common_ompio_file_iread_at (ompio_file_t *fh,
|
||||
|
||||
|
||||
/* Infrastructure for collective operations */
|
||||
int mca_common_ompio_file_read_all (ompio_file_t *fh,
|
||||
void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t * status)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* No need to check for GPU buffer for collective I/O.
|
||||
Most algorithms copy data from aggregators, and send/recv
|
||||
to/from GPU buffers works if ompi was compiled was GPU support.
|
||||
|
||||
If the individual fcoll component is used: there are no aggregators
|
||||
in that concept. However, since they call common_ompio_file_write,
|
||||
CUDA buffers are handled by that routine.
|
||||
|
||||
Thus, we only check for
|
||||
1. Datarepresentation is anything other than 'native' and
|
||||
2. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
size_t pos=0, max_data=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
struct iovec *decoded_iov = NULL;
|
||||
uint32_t iov_count = 0;
|
||||
|
||||
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
decoded_iov->iov_base,
|
||||
decoded_iov->iov_len,
|
||||
MPI_BYTE,
|
||||
status);
|
||||
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
|
||||
opal_convertor_cleanup (&convertor);
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_read_at_all (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf,
|
||||
@ -388,16 +462,41 @@ int mca_common_ompio_file_read_at_all (ompio_file_t *fh,
|
||||
mca_common_ompio_file_get_position (fh, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, offset);
|
||||
ret = fh->f_fcoll->fcoll_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
ret = mca_common_ompio_file_read_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iread_all (ompio_file_t *fp,
|
||||
void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iread_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
void *buf,
|
||||
@ -411,25 +510,17 @@ int mca_common_ompio_file_iread_at_all (ompio_file_t *fp,
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
|
||||
ret = mca_common_ompio_file_iread_all (fp,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int mca_common_ompio_set_explicit_offset (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset)
|
||||
{
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
@ -91,6 +91,12 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
fh->f_datarep = NULL;
|
||||
}
|
||||
|
||||
if (NULL != fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (fh->f_file_convertor);
|
||||
free (fh->f_file_convertor);
|
||||
fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
/* Reset the flags first */
|
||||
if ( fh->f_flags & OMPIO_CONTIGUOUS_FVIEW ) {
|
||||
fh->f_flags &= ~OMPIO_CONTIGUOUS_FVIEW;
|
||||
@ -98,9 +104,24 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) {
|
||||
fh->f_flags &= ~OMPIO_UNIFORM_FVIEW;
|
||||
}
|
||||
if ( fh->f_flags & OMPIO_DATAREP_NATIVE ) {
|
||||
fh->f_flags &= ~OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
fh->f_datarep = strdup (datarep);
|
||||
datatype_duplicate (filetype, &fh->f_orig_filetype );
|
||||
|
||||
if ( !(strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
|
||||
fh->f_file_convertor = malloc ( sizeof(struct opal_convertor_t) );
|
||||
if ( NULL == fh->f_file_convertor ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_convertor_clone (ompi_mpi_external32_convertor, fh->f_file_convertor, 0);
|
||||
}
|
||||
else {
|
||||
fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0);
|
||||
fh->f_flags |= OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
|
||||
datatype_duplicate (filetype, &fh->f_orig_filetype );
|
||||
opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent);
|
||||
opal_datatype_type_size (&filetype->super, &ftype_size);
|
||||
|
||||
@ -129,6 +150,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
1,
|
||||
NULL,
|
||||
&max_data,
|
||||
fh->f_file_convertor,
|
||||
&fh->f_decoded_iov,
|
||||
&fh->f_iov_count);
|
||||
|
||||
|
@ -31,13 +31,10 @@
|
||||
|
||||
#include "common_ompio.h"
|
||||
#include "common_ompio_request.h"
|
||||
#include "common_ompio_buffer.h"
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
|
||||
int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
const void *buf,
|
||||
int count,
|
||||
@ -72,16 +69,34 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup ( &convertor);
|
||||
}
|
||||
@ -91,18 +106,11 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
status->_ucount = 0;
|
||||
@ -151,11 +159,11 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
fh->f_io_array = NULL;
|
||||
}
|
||||
}
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( is_gpu && !is_managed ) {
|
||||
|
||||
if ( need_to_copy ) {
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
@ -230,16 +238,34 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
int i = 0; /* index into the decoded iovec of the buffer */
|
||||
int j = 0; /* index into the file vie iovec */
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup (&convertor);
|
||||
|
||||
@ -252,18 +278,11 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
ompio_req->req_ompi.req_status._ucount = 0;
|
||||
@ -352,6 +371,62 @@ int mca_common_ompio_file_iwrite_at (ompio_file_t *fh,
|
||||
|
||||
/* Collective operations */
|
||||
/******************************************************************/
|
||||
int mca_common_ompio_file_write_all (ompio_file_t *fh,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_status_public_t *status)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* No need to check for GPU buffer for collective I/O.
|
||||
Most algorithms first copy data to aggregators, and send/recv
|
||||
to/from GPU buffers works if ompi was compiled was GPU support.
|
||||
|
||||
If the individual fcoll component is used: there are no aggregators
|
||||
in that concept. However, since they call common_ompio_file_write,
|
||||
CUDA buffers are handled by that routine.
|
||||
|
||||
Thus, we only check for
|
||||
1. Datarepresentation is anything other than 'native' and
|
||||
2. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
size_t pos=0, max_data=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
struct iovec *decoded_iov = NULL;
|
||||
uint32_t iov_count = 0;
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup ( &convertor);
|
||||
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
decoded_iov->iov_base,
|
||||
decoded_iov->iov_len,
|
||||
MPI_BYTE,
|
||||
status);
|
||||
|
||||
|
||||
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
|
||||
if (NULL != decoded_iov) {
|
||||
free (decoded_iov);
|
||||
decoded_iov = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_write_at_all (ompio_file_t *fh,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
@ -365,30 +440,23 @@ int mca_common_ompio_file_write_at_all (ompio_file_t *fh,
|
||||
mca_common_ompio_file_get_position (fh, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, offset);
|
||||
ret = fh->f_fcoll->fcoll_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
ret = mca_common_ompio_file_write_all (fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fh, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
int mca_common_ompio_file_iwrite_all (ompio_file_t *fp,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
|
||||
int ret = OMPI_SUCCESS;
|
||||
OMPI_MPI_OFFSET_TYPE prev_offset;
|
||||
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iwrite_all (fp,
|
||||
@ -404,11 +472,32 @@ int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
ret = mca_common_ompio_file_iwrite ( fp, buf, count, datatype, request );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp,
|
||||
OMPI_MPI_OFFSET_TYPE offset,
|
||||
const void *buf,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
ompi_request_t **request)
|
||||
{
|
||||
|
||||
int ret = OMPI_SUCCESS;
|
||||
OMPI_MPI_OFFSET_TYPE prev_offset;
|
||||
|
||||
mca_common_ompio_file_get_position (fp, &prev_offset );
|
||||
mca_common_ompio_set_explicit_offset (fp, offset);
|
||||
|
||||
ret = mca_common_ompio_file_iwrite_all ( fp, buf, count, datatype, request );
|
||||
|
||||
mca_common_ompio_set_explicit_offset (fp, prev_offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Helper function used by both read and write operations */
|
||||
/**************************************************************/
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -19,9 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "common_ompio_request.h"
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "common_ompio_cuda.h"
|
||||
#endif
|
||||
#include "common_ompio_buffer.h"
|
||||
|
||||
static void mca_common_ompio_request_construct(mca_ompio_request_t* req);
|
||||
static void mca_common_ompio_request_destruct(mca_ompio_request_t *req);
|
||||
@ -37,7 +35,6 @@ opal_list_t mca_common_ompio_pending_requests = {{0}};
|
||||
static int mca_common_ompio_request_free ( struct ompi_request_t **req)
|
||||
{
|
||||
mca_ompio_request_t *ompio_req = ( mca_ompio_request_t *)*req;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if ( NULL != ompio_req->req_tbuf ) {
|
||||
if ( MCA_OMPIO_REQUEST_READ == ompio_req->req_type ){
|
||||
struct iovec decoded_iov;
|
||||
@ -50,7 +47,6 @@ static int mca_common_ompio_request_free ( struct ompi_request_t **req)
|
||||
}
|
||||
mca_common_ompio_release_buf ( NULL, ompio_req->req_tbuf );
|
||||
}
|
||||
#endif
|
||||
if ( NULL != ompio_req->req_free_fn ) {
|
||||
ompio_req->req_free_fn (ompio_req );
|
||||
}
|
||||
@ -77,10 +73,8 @@ void mca_common_ompio_request_construct(mca_ompio_request_t* req)
|
||||
req->req_ompi.req_cancel = mca_common_ompio_request_cancel;
|
||||
req->req_ompi.req_type = OMPI_REQUEST_IO;
|
||||
req->req_data = NULL;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
req->req_tbuf = NULL;
|
||||
req->req_size = 0;
|
||||
#endif
|
||||
req->req_progress_fn = NULL;
|
||||
req->req_free_fn = NULL;
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -52,11 +52,9 @@ struct mca_ompio_request_t {
|
||||
mca_ompio_request_type_t req_type;
|
||||
void *req_data;
|
||||
opal_list_item_t req_item;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
void *req_tbuf;
|
||||
size_t req_size;
|
||||
opal_convertor_t req_convertor;
|
||||
#endif
|
||||
mca_fbtl_base_module_progress_fn_t req_progress_fn;
|
||||
mca_fbtl_base_module_request_free_fn_t req_free_fn;
|
||||
};
|
||||
|
@ -130,6 +130,7 @@ mca_fcoll_dynamic_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -132,6 +132,7 @@ mca_fcoll_dynamic_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -130,6 +130,7 @@ mca_fcoll_dynamic_gen2_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -170,6 +170,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -155,6 +155,7 @@ mca_fcoll_two_phase_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&temp_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -185,6 +185,7 @@ mca_fcoll_two_phase_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&temp_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -129,6 +129,7 @@ mca_fcoll_vulcan_file_read_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret){
|
||||
|
@ -185,6 +185,7 @@ int mca_fcoll_vulcan_file_write_all (ompio_file_t *fh,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
if (OMPI_SUCCESS != ret ){
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
@ -34,14 +34,12 @@
|
||||
#include "ompi/mca/fs/base/base.h"
|
||||
#include "io_ompio.h"
|
||||
#include "ompi/mca/common/ompio/common_ompio_request.h"
|
||||
#include "ompi/mca/common/ompio/common_ompio_buffer.h"
|
||||
|
||||
#ifdef HAVE_IME_NATIVE_H
|
||||
#include "ompi/mca/fs/ime/fs_ime.h"
|
||||
#endif
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/ompio/common_ompio_cuda.h"
|
||||
#endif
|
||||
|
||||
int mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE;
|
||||
int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE;
|
||||
@ -280,11 +278,7 @@ static int open_component(void)
|
||||
static int close_component(void)
|
||||
{
|
||||
mca_common_ompio_request_fini ();
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_common_ompio_cuda_alloc_fini();
|
||||
#endif
|
||||
|
||||
mca_common_ompio_buffer_alloc_fini();
|
||||
OBJ_DESTRUCT(&mca_io_ompio_mutex);
|
||||
|
||||
#ifdef HAVE_IME_NATIVE_H
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -137,12 +137,11 @@ int mca_io_ompio_file_read_all (ompi_file_t *fh,
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
ret = data->ompio_fh.
|
||||
f_fcoll->fcoll_file_read_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
ret = mca_common_ompio_file_read_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
size_t size;
|
||||
@ -168,19 +167,11 @@ int mca_io_ompio_file_iread_all (ompi_file_t *fh,
|
||||
fp = &data->ompio_fh;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
|
||||
}
|
||||
ret = mca_common_ompio_file_iread_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -66,7 +66,8 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp,
|
||||
mca_common_ompio_data_t *data;
|
||||
ompio_file_t *fh;
|
||||
|
||||
if ( (strcmp(datarep, "native") && strcmp(datarep, "NATIVE"))) {
|
||||
if ( (strcmp(datarep, "native") && strcmp(datarep, "NATIVE") &&
|
||||
strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
|
||||
return MPI_ERR_UNSUPPORTED_DATAREP;
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -143,12 +143,11 @@ int mca_io_ompio_file_write_all (ompi_file_t *fh,
|
||||
data = (mca_common_ompio_data_t *) fh->f_io_selected_data;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
ret = data->ompio_fh.
|
||||
f_fcoll->fcoll_file_write_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
ret = mca_common_ompio_file_write_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
status);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
size_t size;
|
||||
@ -192,19 +191,11 @@ int mca_io_ompio_file_iwrite_all (ompi_file_t *fh,
|
||||
fp = &data->ompio_fh;
|
||||
|
||||
OPAL_THREAD_LOCK(&fh->f_lock);
|
||||
if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) {
|
||||
ret = fp->f_fcoll->fcoll_file_iwrite_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
}
|
||||
else {
|
||||
/* this fcoll component does not support non-blocking
|
||||
collective I/O operations. WE fake it with
|
||||
individual non-blocking I/O operations. */
|
||||
ret = mca_common_ompio_file_iwrite ( fp, buf, count, datatype, request );
|
||||
}
|
||||
ret = mca_common_ompio_file_iwrite_all (&data->ompio_fh,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
request);
|
||||
OPAL_THREAD_UNLOCK(&fh->f_lock);
|
||||
|
||||
return ret;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user