1
1

common/ompio: make individual read operations work.

Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
Edgar Gabriel 2019-05-20 17:22:33 -05:00
родитель f6b3a0af52
Коммит ab56e6f0db
3 изменённых файлов: 66 добавлений и 38 удалений

Просмотреть файл

@ -40,6 +40,24 @@
_decoded_iov->iov_len = _max_data; \
_iov_count=1;}
#define OMPIO_PREPARE_READ_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
opal_convertor_copy_and_prepare_for_recv ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, 0, _convertor ); \
opal_convertor_get_packed_size( _convertor, &_max_data ); \
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
if ( NULL == _tbuf ) { \
opal_output(1, "common_ompio: error allocating memory\n"); \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
_decoded_iov = (struct iovec *) malloc ( sizeof ( struct iovec )); \
if ( NULL == _decoded_iov ) { \
opal_output(1, "common_ompio: could not allocate memory.\n"); \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
_decoded_iov->iov_base = _tbuf; \
_decoded_iov->iov_len = _max_data; \
_iov_count=1;}
#if OPAL_CUDA_SUPPORT
void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf,
int *is_gpu, int *is_managed);

Просмотреть файл

@ -88,17 +88,33 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
return ret;
}
bool need_to_copy = false;
opal_convertor_t convertor;
#if OPAL_CUDA_SUPPORT
int is_gpu, is_managed;
opal_convertor_t convertor;
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
if ( is_gpu && !is_managed ) {
need_to_copy = true;
}
#endif
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
!(datatype == &ompi_mpi_byte.dt ||
datatype == &ompi_mpi_char.dt )) {
/* only need to copy if any of these conditions are given:
1. buffer is an unmanaged CUDA buffer (checked above).
2. Datarepresentation is anything other than 'native' and
3. datatype is not byte or char (i.e it does require some actual
work to be done e.g. for external32.
*/
need_to_copy = true;
}
if ( need_to_copy ) {
char *tbuf=NULL;
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
}
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
}
else {
mca_common_ompio_decode_datatype (fh,
datatype,
@ -109,16 +125,7 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
&decoded_iov,
&iov_count);
}
#else
mca_common_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
fh->f_mem_convertor,
&decoded_iov,
&iov_count);
#endif
if ( 0 < max_data && 0 == fh->f_iov_count ) {
if ( MPI_STATUS_IGNORE != status ) {
status->_ucount = 0;
@ -170,15 +177,14 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
}
}
#if OPAL_CUDA_SUPPORT
if ( is_gpu && !is_managed ) {
if ( need_to_copy ) {
size_t pos=0;
opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
opal_convertor_cleanup (&convertor);
mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
}
#endif
if (NULL != decoded_iov) {
free (decoded_iov);
decoded_iov = NULL;
@ -257,13 +263,32 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
int i = 0; /* index into the decoded iovec of the buffer */
int j = 0; /* index into the file vie iovec */
bool need_to_copy = false;
#if OPAL_CUDA_SUPPORT
int is_gpu, is_managed;
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
if ( is_gpu && !is_managed ) {
need_to_copy = true;
}
#endif
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
!(datatype == &ompi_mpi_byte.dt ||
datatype == &ompi_mpi_char.dt )) {
/* only need to copy if any of these conditions are given:
1. buffer is an unmanaged CUDA buffer (checked above).
2. Datarepresentation is anything other than 'native' and
3. datatype is not byte or char (i.e it does require some actual
work to be done e.g. for external32.
*/
need_to_copy = true;
}
if ( need_to_copy ) {
char *tbuf=NULL;
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
OMPIO_PREPARE_READ_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count);
ompio_req->req_tbuf = tbuf;
ompio_req->req_size = max_data;
@ -278,16 +303,7 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
&decoded_iov,
&iov_count);
}
#else
mca_common_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
fh->f_mem_convertor,
&decoded_iov,
&iov_count);
#endif
if ( 0 < max_data && 0 == fh->f_iov_count ) {
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
ompio_req->req_ompi.req_status._ucount = 0;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2018 University of Houston. All rights reserved.
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
@ -34,14 +34,12 @@
#include "ompi/mca/fs/base/base.h"
#include "io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio_request.h"
#include "ompi/mca/common/ompio/common_ompio_buffer.h"
#ifdef HAVE_IME_NATIVE_H
#include "ompi/mca/fs/ime/fs_ime.h"
#endif
#if OPAL_CUDA_SUPPORT
#include "ompi/mca/common/ompio/common_ompio_cuda.h"
#endif
int mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE;
int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE;
@ -280,11 +278,7 @@ static int open_component(void)
static int close_component(void)
{
mca_common_ompio_request_fini ();
#if OPAL_CUDA_SUPPORT
mca_common_ompio_cuda_alloc_fini();
#endif
mca_common_ompio_buffer_alloc_fini();
OBJ_DESTRUCT(&mca_io_ompio_mutex);
#ifdef HAVE_IME_NATIVE_H