common/ompio: individual write of external32 works
both blocking and non-blocking. collective write and read operations not yet. Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
родитель
d955753cb8
Коммит
f6b3a0af52
@ -66,7 +66,7 @@
|
||||
#define OMPIO_LOCK_ENTIRE_FILE 0x00000080
|
||||
#define OMPIO_LOCK_NEVER 0x00000100
|
||||
#define OMPIO_LOCK_NOT_THIS_OP 0x00000200
|
||||
|
||||
#define OMPIO_DATAREP_NATIVE 0x00000400
|
||||
|
||||
#define OMPIO_ROOT 0
|
||||
|
||||
|
@ -23,8 +23,8 @@
|
||||
|
||||
|
||||
#define OMPIO_PREPARE_BUF(_fh,_buf,_count,_datatype,_tbuf,_convertor,_max_data,_decoded_iov,_iov_count){ \
|
||||
opal_convertor_clone ( _fh->f_file_convertor, _convertor, 0); \
|
||||
opal_convertor_prepare_for_send ( _convertor, &(_datatype->super), _count, _buf );\
|
||||
OBJ_CONSTRUCT( _convertor, opal_convertor_t); \
|
||||
opal_convertor_copy_and_prepare_for_send ( _fh->f_file_convertor, &(_datatype->super), _count, _buf, CONVERTOR_SEND_CONVERSION, _convertor ); \
|
||||
opal_convertor_get_packed_size( _convertor, &_max_data ); \
|
||||
_tbuf = mca_common_ompio_alloc_buf (_fh, _max_data); \
|
||||
if ( NULL == _tbuf ) { \
|
||||
|
@ -326,13 +326,13 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
|
||||
|
||||
if (NULL != ompio_fh->f_mem_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_mem_convertor);
|
||||
//free (ompio_fh->f_mem_convertor);
|
||||
free (ompio_fh->f_mem_convertor);
|
||||
ompio_fh->f_mem_convertor = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompio_fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (ompio_fh->f_file_convertor);
|
||||
//free (ompio_fh->f_file_convertor);
|
||||
free (ompio_fh->f_file_convertor);
|
||||
ompio_fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
@ -391,6 +391,13 @@ int mca_common_ompio_file_get_position (ompio_file_t *fh,
|
||||
{
|
||||
OMPI_MPI_OFFSET_TYPE off;
|
||||
|
||||
if ( 0 == fh->f_view_extent ||
|
||||
0 == fh->f_view_size ||
|
||||
0 == fh->f_etype_size ) {
|
||||
/* not sure whether we should raise an error here */
|
||||
*offset = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* No. of copies of the entire file view */
|
||||
off = (fh->f_offset - fh->f_disp)/fh->f_view_extent;
|
||||
|
||||
|
@ -93,7 +93,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
|
||||
if (NULL != fh->f_file_convertor) {
|
||||
opal_convertor_cleanup (fh->f_file_convertor);
|
||||
//free (fh->f_file_convertor);
|
||||
free (fh->f_file_convertor);
|
||||
fh->f_file_convertor = NULL;
|
||||
}
|
||||
|
||||
@ -104,10 +104,13 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) {
|
||||
fh->f_flags &= ~OMPIO_UNIFORM_FVIEW;
|
||||
}
|
||||
if ( fh->f_flags & OMPIO_DATAREP_NATIVE ) {
|
||||
fh->f_flags &= ~OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
fh->f_datarep = strdup (datarep);
|
||||
|
||||
if ( !(strcmp(datarep, "external32") && strcmp(datarep, "EXTERNAL32"))) {
|
||||
fh->f_file_convertor = malloc (sizeof(opal_convertor_t));
|
||||
fh->f_file_convertor = malloc ( sizeof(struct opal_convertor_t) );
|
||||
if ( NULL == fh->f_file_convertor ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -115,6 +118,7 @@ int mca_common_ompio_set_view (ompio_file_t *fh,
|
||||
}
|
||||
else {
|
||||
fh->f_file_convertor = opal_convertor_create (opal_local_arch, 0);
|
||||
fh->f_flags |= OMPIO_DATAREP_NATIVE;
|
||||
}
|
||||
|
||||
datatype_duplicate (filetype, &fh->f_orig_filetype );
|
||||
|
@ -35,7 +35,6 @@
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
const void *buf,
|
||||
int count,
|
||||
@ -70,16 +69,34 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup ( &convertor);
|
||||
}
|
||||
@ -93,16 +110,7 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
if ( MPI_STATUS_IGNORE != status ) {
|
||||
status->_ucount = 0;
|
||||
@ -230,16 +238,34 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
int i = 0; /* index into the decoded iovec of the buffer */
|
||||
int j = 0; /* index into the file vie iovec */
|
||||
|
||||
bool need_to_copy = false;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
int is_gpu, is_managed;
|
||||
mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
|
||||
if ( is_gpu && !is_managed ) {
|
||||
need_to_copy = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( !( fh->f_flags & OMPIO_DATAREP_NATIVE ) &&
|
||||
!(datatype == &ompi_mpi_byte.dt ||
|
||||
datatype == &ompi_mpi_char.dt )) {
|
||||
/* only need to copy if any of these conditions are given:
|
||||
1. buffer is an unmanaged CUDA buffer (checked above).
|
||||
2. Datarepresentation is anything other than 'native' and
|
||||
3. datatype is not byte or char (i.e it does require some actual
|
||||
work to be done e.g. for external32.
|
||||
*/
|
||||
need_to_copy = true;
|
||||
}
|
||||
|
||||
if ( need_to_copy ) {
|
||||
size_t pos=0;
|
||||
char *tbuf=NULL;
|
||||
opal_convertor_t convertor;
|
||||
|
||||
OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
|
||||
OMPIO_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);
|
||||
opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
|
||||
opal_convertor_cleanup (&convertor);
|
||||
|
||||
@ -256,16 +282,7 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
}
|
||||
#else
|
||||
mca_common_ompio_decode_datatype (fh,
|
||||
datatype,
|
||||
count,
|
||||
buf,
|
||||
&max_data,
|
||||
fh->f_mem_convertor,
|
||||
&decoded_iov,
|
||||
&iov_count);
|
||||
#endif
|
||||
|
||||
if ( 0 < max_data && 0 == fh->f_iov_count ) {
|
||||
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
ompio_req->req_ompi.req_status._ucount = 0;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user