Provide a better fix for #6285.
The issue was a little complicated due to the internal stack used in the convertor. The main issue was that in the case where we run out of iov space to save the raw description of the data while hanbdling a repetition (loop), instead of saving the current position and bailing out directly we reading of the next predefined type element. It worked in most cases, except the one identified by the HDF5 test. However, the biggest issue here was the drop in performance for all ensuing calls to the convertor pack/unpack, as instead of handling contiguous loops as a whole (and minimizing the number of memory copies) we copied data description by data description. Signed-off-by: George Bosilca <bosilca@icl.utk.edu> (back-ported from commit open-mpi/ompi@5a82c4fd07)
Этот коммит содержится в:
родитель
f7327735a0
Коммит
8acdc53892
@ -102,7 +102,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
|
||||
/* now here we have a basic datatype */
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf,
|
||||
pConvertor->pDesc, pConvertor->count );
|
||||
DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %lu}\n",
|
||||
DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n",
|
||||
index, (void*)source_base, (unsigned long)blength ); );
|
||||
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
|
||||
iov[index].iov_len = blength;
|
||||
@ -115,7 +115,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
|
||||
for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf,
|
||||
pConvertor->pDesc, pConvertor->count );
|
||||
DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %lu}\n",
|
||||
DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n",
|
||||
index, (void*)source_base, (unsigned long)blength ); );
|
||||
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
|
||||
iov[index].iov_len = blength;
|
||||
@ -172,16 +172,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
|
||||
if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
ptrdiff_t offset = end_loop->first_elem_disp;
|
||||
source_base += offset;
|
||||
for(uint32_t i = count_desc; i > 0; i--, index++ ) {
|
||||
if (index >= *iov_count) {
|
||||
dt_elem_desc_t* nElem = pElem + 1;
|
||||
while (nElem->elem.common.type == OPAL_DATATYPE_LOOP) {
|
||||
nElem++;
|
||||
}
|
||||
assert(OPAL_DATATYPE_END_LOOP != nElem->elem.common.type);
|
||||
offset = nElem->elem.disp;
|
||||
break;
|
||||
}
|
||||
for(uint32_t i = MIN(count_desc, *iov_count - index); i > 0; i--, index++ ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf,
|
||||
pConvertor->pDesc, pConvertor->count );
|
||||
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
|
||||
@ -189,6 +180,10 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
|
||||
source_base += pElem->loop.extent;
|
||||
raw_data += end_loop->size;
|
||||
count_desc--;
|
||||
DO_DEBUG( opal_output( 0, "raw contig loop generate iov[%d] = {base %p, length %" PRIsize_t "}"
|
||||
"space %lu [pos_desc %d]\n",
|
||||
index, iov[index].iov_base, iov[index].iov_len,
|
||||
(unsigned long)raw_data, pos_desc ); );
|
||||
}
|
||||
source_base -= offset;
|
||||
if( 0 == count_desc ) { /* completed */
|
||||
@ -196,6 +191,9 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
|
||||
goto update_loop_description;
|
||||
}
|
||||
}
|
||||
if( index == *iov_count ) { /* all iov have been filled, we need to bail out */
|
||||
goto complete_loop;
|
||||
}
|
||||
local_disp = (ptrdiff_t)source_base - local_disp;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc,
|
||||
pStack->disp + local_disp);
|
||||
|
@ -1,3 +1,17 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2019 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ddt_lib.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
@ -12,7 +26,8 @@
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
int mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
static int
|
||||
mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
int count,
|
||||
struct iovec **iov,
|
||||
uint32_t *iovec_count,
|
||||
@ -310,20 +325,35 @@ int main (int argc, char *argv[]) {
|
||||
datatype->super.opt_desc.used = 184;
|
||||
datatype->super.opt_desc.desc = descs;
|
||||
|
||||
uint32_t iovec_count = 0;
|
||||
struct iovec * iov = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov, &iovec_count, 300);
|
||||
uint32_t iovec_count2 = 0;
|
||||
struct iovec * iov2 = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov2, &iovec_count2, 100);
|
||||
/* Get the entire raw description of the datatype in a single call */
|
||||
uint32_t iovec_count_300 = 0;
|
||||
struct iovec * iov_300 = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov_300, &iovec_count_300, 300);
|
||||
/* Get the raw description of the datatype 10 elements at the time. This stresses some
|
||||
* of the execution paths in the convertor raw.
|
||||
*/
|
||||
uint32_t iovec_count_10 = 0;
|
||||
struct iovec * iov_10 = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov_10, &iovec_count_10, 10);
|
||||
/* Get the raw description of the datatype one element at the time. This stresses all
|
||||
* execution paths in the convertor raw.
|
||||
*/
|
||||
uint32_t iovec_count_1 = 0;
|
||||
struct iovec * iov_1 = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov_1, &iovec_count_1, 1);
|
||||
|
||||
assert(iovec_count == iovec_count2);
|
||||
|
||||
assert(iovec_count_300 == iovec_count_10);
|
||||
assert(iovec_count_300 == iovec_count_1);
|
||||
// assert(iov[100].iov_base == iov2[100].iov_base);
|
||||
// assert(iov[100].iov_len == iov2[100].iov_len);
|
||||
for (int i=0; i<iovec_count; i++) {
|
||||
assert(iov[i].iov_base == iov2[i].iov_base);
|
||||
assert(iov[i].iov_len == iov2[i].iov_len);
|
||||
for (uint32_t i = 0; i < iovec_count_300; i++) {
|
||||
assert(iov_300[i].iov_base == iov_10[i].iov_base);
|
||||
assert(iov_300[i].iov_len == iov_10[i].iov_len);
|
||||
assert(iov_300[i].iov_base == iov_1[i].iov_base);
|
||||
assert(iov_300[i].iov_len == iov_1[i].iov_len);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user