1
1

osc: fix datatype related issues in the one-sided code

This commit fixes two issues:

 - osc/rdma: The target side of an accumulate was using the target datatype
   in the receive to the packed buffer. This was conflicting with the way
   the reduction is done into the target buffer. Changed the receive to use
   the primitive datatype.

 - osc/base: The copy table was completely wrong. Fixed the table to match
   the underlying datatypes (which are opal not ompi datatypes).

 - osc/base: There is a problem using the optimized description. Fall back
   on using the non-optimized description until we can understand what is
   going wrong.

cmr=v1.8:reviewer=jsquyres

This commit was SVN r31204.
Этот коммит содержится в:
Nathan Hjelm 2014-03-25 15:28:48 +00:00
родитель bc55276844
Коммит 949abe45cd
2 изменённых файлов: 82 добавлений и 78 удалений

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
@ -7,7 +8,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
@ -97,83 +98,66 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \
}
/* set up copy functions for the basic C MPI data types */
COPY_TYPE( char, char, 1 )
COPY_TYPE( short, short, 1 )
COPY_TYPE( int, int, 1 )
COPY_TYPE( long, long, 1 )
COPY_TYPE( long_long, long long, 1 )
COPY_TYPE( float, float, 1 )
COPY_TYPE( double, double, 1 )
COPY_TYPE( long_double, long double, 1 )
COPY_TYPE( complex_float, ompi_mpi_cxx_cplex, 1 )
COPY_TYPE( complex_double, ompi_mpi_cxx_dblcplex, 1 )
COPY_TYPE( complex_long_double, ompi_mpi_cxx_ldblcplex, 1 )
COPY_TYPE( int1, int8_t, 1)
COPY_TYPE( int2, int16_t, 1)
COPY_TYPE( int4, int32_t, 1)
COPY_TYPE( int8, int64_t, 1)
COPY_TYPE( bool, bool, 1)
COPY_TYPE( float, float, 1)
COPY_TYPE( double, double, 1)
#if HAVE_LONG_DOUBLE
COPY_TYPE( long_double, long double, 1)
#endif
COPY_TYPE( float_complex, float _Complex, 1)
COPY_TYPE( double_complex, double _Complex, 1)
#if HAVE_LONG_DOUBLE__COMPLEX
COPY_TYPE( long_double_complex, long double _Complex, 1)
#endif
/* table of predefined copy functions - one for each MPI type */
/* XXX TODO Adapt to new layout */
static conversion_fct_t ompi_osc_base_copy_functions[OMPI_DATATYPE_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_END_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_CHARACTER */
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_SIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_int, /* DT_UNSIGNED */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_PACKED */
(conversion_fct_t)NULL, /* DT_WCHAR */
#if SIZEOF_BOOL == SIZEOF_CHAR
(conversion_fct_t)copy_char, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_SHORT
(conversion_fct_t)copy_short, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_INT
(conversion_fct_t)copy_int, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_LONG
(conversion_fct_t)copy_long, /* DT_CXX_BOOL */
#else
(conversion_fct_t)NULL, /* DT_CXX_BOOL */
/* table of predefined copy functions - one for each opal basic type */
static conversion_fct_t ompi_osc_base_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
[OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1,
[OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1,
[OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2,
[OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2,
[OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4,
[OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4,
[OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8,
[OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8,
#if SIZEOF_FLOAT == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_long_double,
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_CHAR
(conversion_fct_t)copy_char, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_SHORT
(conversion_fct_t)copy_short, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT
(conversion_fct_t)copy_int, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_LONG
(conversion_fct_t)copy_long, /* DT_LOGIC */
#else
(conversion_fct_t)NULL, /* DT_LOGIC */
#if SIZEOF_FLOAT == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_long_double,
#endif
(conversion_fct_t)copy_int, /* DT_INTEGER */
(conversion_fct_t)copy_float, /* DT_REAL */
(conversion_fct_t)copy_double, /* DT_DBLPREC */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_2INT */
(conversion_fct_t)NULL, /* DT_2INTEGER */
(conversion_fct_t)NULL, /* DT_2REAL */
(conversion_fct_t)NULL, /* DT_2DBLPREC */
(conversion_fct_t)NULL, /* DT_2COMPLEX */
(conversion_fct_t)NULL, /* DT_2DOUBLE_COMPLEX */
(conversion_fct_t)NULL, /* DT_FLOAT_INT */
(conversion_fct_t)NULL, /* DT_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_INT */
(conversion_fct_t)NULL, /* DT_SHORT_INT */
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
#if SIZEOF_FLOAT == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_long_double,
#endif
#if SIZEOF_FLOAT == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_long_double,
#endif
[OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex,
[OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex,
#if HAVE_LONG_DOUBLE__COMPLEX
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex,
#endif
[OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool,
};
int
@ -218,6 +202,8 @@ ompi_osc_base_process_op(void *outbuf,
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
convertor.convertor.master = &master;
convertor.convertor.fAdvance = opal_unpack_general;
/* there are issues with using the optimized description here */
convertor.convertor.use_desc = &datatype->super.desc;
iov.iov_len = inbuflen;
iov.iov_base = (IOVBASE_TYPE*) inbuf;

Просмотреть файл

@ -822,6 +822,8 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
void *target = (unsigned char*) module->baseptr +
((unsigned long) acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
ompi_datatype_t *primitive_datatype;
int primitive_count;
int ret;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
@ -838,6 +840,13 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
break;
}
ret = ompi_osc_base_get_primitive_type_info (datatype, &primitive_datatype, &primitive_count);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
primitive_count *= acc_header->count;
buflen = datatype_buffer_length (datatype, acc_header->count);
/* allocate a temporary buffer to receive the accumulate data */
@ -854,8 +863,8 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
break;
}
ret = ompi_osc_rdma_irecv_w_cb (buffer, acc_header->count, datatype, source, acc_header->tag,
module->comm, NULL, accumulate_cb, acc_data);
ret = ompi_osc_rdma_irecv_w_cb (buffer, primitive_count, primitive_datatype, source,
acc_header->tag, module->comm, NULL, accumulate_cb, acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OBJ_RELEASE(acc_data);
}
@ -941,7 +950,9 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
((unsigned long) get_acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
struct osc_rdma_accumulate_data_t *acc_data;
ompi_datatype_t *primitive_datatype;
ompi_request_t *recv_request;
int primitive_count;
ompi_proc_t *proc;
size_t buflen;
void *buffer;
@ -960,6 +971,13 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
break;
}
ret = ompi_osc_base_get_primitive_type_info (datatype, &primitive_datatype, &primitive_count);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
primitive_count *= get_acc_header->count;
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, get_acc_header->count,
datatype, op, 2, &acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -973,7 +991,7 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
break;
}
ret = ompi_osc_rdma_isend_w_cb (target, get_acc_header->count, datatype, source, get_acc_header->tag,
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, get_acc_header->tag,
module->comm, accumulate_cb, acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
/* cancel the receive and free the accumulate data */