osc: fix datatype related issues in the one-sided code
This commit fixes two issues: - osc/rdma: The target side of an accumulate was using the target datatype in the receive to the packed buffer. This was conflicting with the way the reduction is done into the target buffer. Changed the receive to use the primitive datatype. - osc/base: The copy table was completely wrong. Fixed the table to match the underlying datatypes (which are opal not ompi datatypes). - osc/base: There is a problem using the optimized description. Fall back on using the non-optimized description until we can understand what is going wrong. cmr=v1.8:reviewer=jsquyres This commit was SVN r31204.
Этот коммит содержится в:
родитель
bc55276844
Коммит
949abe45cd
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
@ -7,7 +8,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -97,83 +98,66 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \
|
||||
}
|
||||
|
||||
/* set up copy functions for the basic C MPI data types */
|
||||
COPY_TYPE( char, char, 1 )
|
||||
COPY_TYPE( short, short, 1 )
|
||||
COPY_TYPE( int, int, 1 )
|
||||
COPY_TYPE( long, long, 1 )
|
||||
COPY_TYPE( long_long, long long, 1 )
|
||||
COPY_TYPE( float, float, 1 )
|
||||
COPY_TYPE( double, double, 1 )
|
||||
COPY_TYPE( long_double, long double, 1 )
|
||||
COPY_TYPE( complex_float, ompi_mpi_cxx_cplex, 1 )
|
||||
COPY_TYPE( complex_double, ompi_mpi_cxx_dblcplex, 1 )
|
||||
COPY_TYPE( complex_long_double, ompi_mpi_cxx_ldblcplex, 1 )
|
||||
COPY_TYPE( int1, int8_t, 1)
|
||||
COPY_TYPE( int2, int16_t, 1)
|
||||
COPY_TYPE( int4, int32_t, 1)
|
||||
COPY_TYPE( int8, int64_t, 1)
|
||||
COPY_TYPE( bool, bool, 1)
|
||||
COPY_TYPE( float, float, 1)
|
||||
COPY_TYPE( double, double, 1)
|
||||
#if HAVE_LONG_DOUBLE
|
||||
COPY_TYPE( long_double, long double, 1)
|
||||
#endif
|
||||
COPY_TYPE( float_complex, float _Complex, 1)
|
||||
COPY_TYPE( double_complex, double _Complex, 1)
|
||||
#if HAVE_LONG_DOUBLE__COMPLEX
|
||||
COPY_TYPE( long_double_complex, long double _Complex, 1)
|
||||
#endif
|
||||
|
||||
/* table of predefined copy functions - one for each MPI type */
|
||||
/* XXX TODO Adapt to new layout */
|
||||
static conversion_fct_t ompi_osc_base_copy_functions[OMPI_DATATYPE_MAX_PREDEFINED] = {
|
||||
(conversion_fct_t)NULL, /* DT_LOOP */
|
||||
(conversion_fct_t)NULL, /* DT_END_LOOP */
|
||||
(conversion_fct_t)NULL, /* DT_LB */
|
||||
(conversion_fct_t)NULL, /* DT_UB */
|
||||
(conversion_fct_t)copy_char, /* DT_CHAR */
|
||||
(conversion_fct_t)copy_char, /* DT_CHARACTER */
|
||||
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
|
||||
(conversion_fct_t)copy_char, /* DT_SIGNED_CHAR */
|
||||
(conversion_fct_t)copy_char, /* DT_BYTE */
|
||||
(conversion_fct_t)copy_short, /* DT_SHORT */
|
||||
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
|
||||
(conversion_fct_t)copy_int, /* DT_INT */
|
||||
(conversion_fct_t)copy_int, /* DT_UNSIGNED */
|
||||
(conversion_fct_t)copy_long, /* DT_LONG */
|
||||
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
|
||||
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
|
||||
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
|
||||
(conversion_fct_t)copy_float, /* DT_FLOAT */
|
||||
(conversion_fct_t)copy_double, /* DT_DOUBLE */
|
||||
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
|
||||
(conversion_fct_t)NULL, /* DT_PACKED */
|
||||
(conversion_fct_t)NULL, /* DT_WCHAR */
|
||||
#if SIZEOF_BOOL == SIZEOF_CHAR
|
||||
(conversion_fct_t)copy_char, /* DT_CXX_BOOL */
|
||||
#elif SIZEOF_BOOL == SIZEOF_SHORT
|
||||
(conversion_fct_t)copy_short, /* DT_CXX_BOOL */
|
||||
#elif SIZEOF_BOOL == SIZEOF_INT
|
||||
(conversion_fct_t)copy_int, /* DT_CXX_BOOL */
|
||||
#elif SIZEOF_BOOL == SIZEOF_LONG
|
||||
(conversion_fct_t)copy_long, /* DT_CXX_BOOL */
|
||||
#else
|
||||
(conversion_fct_t)NULL, /* DT_CXX_BOOL */
|
||||
/* table of predefined copy functions - one for each opal basic type */
|
||||
static conversion_fct_t ompi_osc_base_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
|
||||
[OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1,
|
||||
[OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1,
|
||||
[OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2,
|
||||
[OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2,
|
||||
[OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4,
|
||||
[OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4,
|
||||
[OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8,
|
||||
[OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8,
|
||||
#if SIZEOF_FLOAT == 2
|
||||
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float,
|
||||
#elif SIZEOF_DOUBLE == 2
|
||||
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_double,
|
||||
#elif SIZEOF_LONG_DOUBLE == 2
|
||||
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_long_double,
|
||||
#endif
|
||||
#if OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_CHAR
|
||||
(conversion_fct_t)copy_char, /* DT_LOGIC */
|
||||
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_SHORT
|
||||
(conversion_fct_t)copy_short, /* DT_LOGIC */
|
||||
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT
|
||||
(conversion_fct_t)copy_int, /* DT_LOGIC */
|
||||
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_LONG
|
||||
(conversion_fct_t)copy_long, /* DT_LOGIC */
|
||||
#else
|
||||
(conversion_fct_t)NULL, /* DT_LOGIC */
|
||||
#if SIZEOF_FLOAT == 4
|
||||
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float,
|
||||
#elif SIZEOF_DOUBLE == 4
|
||||
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_double,
|
||||
#elif SIZEOF_LONG_DOUBLE == 4
|
||||
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_long_double,
|
||||
#endif
|
||||
(conversion_fct_t)copy_int, /* DT_INTEGER */
|
||||
(conversion_fct_t)copy_float, /* DT_REAL */
|
||||
(conversion_fct_t)copy_double, /* DT_DBLPREC */
|
||||
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
|
||||
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
|
||||
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
|
||||
(conversion_fct_t)NULL, /* DT_2INT */
|
||||
(conversion_fct_t)NULL, /* DT_2INTEGER */
|
||||
(conversion_fct_t)NULL, /* DT_2REAL */
|
||||
(conversion_fct_t)NULL, /* DT_2DBLPREC */
|
||||
(conversion_fct_t)NULL, /* DT_2COMPLEX */
|
||||
(conversion_fct_t)NULL, /* DT_2DOUBLE_COMPLEX */
|
||||
(conversion_fct_t)NULL, /* DT_FLOAT_INT */
|
||||
(conversion_fct_t)NULL, /* DT_DOUBLE_INT */
|
||||
(conversion_fct_t)NULL, /* DT_LONG_DOUBLE_INT */
|
||||
(conversion_fct_t)NULL, /* DT_LONG_INT */
|
||||
(conversion_fct_t)NULL, /* DT_SHORT_INT */
|
||||
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
|
||||
#if SIZEOF_FLOAT == 8
|
||||
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float,
|
||||
#elif SIZEOF_DOUBLE == 8
|
||||
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_double,
|
||||
#elif SIZEOF_LONG_DOUBLE == 8
|
||||
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_long_double,
|
||||
#endif
|
||||
#if SIZEOF_FLOAT == 16
|
||||
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float,
|
||||
#elif SIZEOF_DOUBLE == 16
|
||||
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_double,
|
||||
#elif SIZEOF_LONG_DOUBLE == 16
|
||||
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_long_double,
|
||||
#endif
|
||||
[OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex,
|
||||
[OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex,
|
||||
#if HAVE_LONG_DOUBLE__COMPLEX
|
||||
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex,
|
||||
#endif
|
||||
[OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool,
|
||||
};
|
||||
|
||||
int
|
||||
@ -218,6 +202,8 @@ ompi_osc_base_process_op(void *outbuf,
|
||||
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
|
||||
convertor.convertor.master = &master;
|
||||
convertor.convertor.fAdvance = opal_unpack_general;
|
||||
/* there are issues with using the optimized description here */
|
||||
convertor.convertor.use_desc = &datatype->super.desc;
|
||||
|
||||
iov.iov_len = inbuflen;
|
||||
iov.iov_base = (IOVBASE_TYPE*) inbuf;
|
||||
|
@ -822,6 +822,8 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
|
||||
void *target = (unsigned char*) module->baseptr +
|
||||
((unsigned long) acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
|
||||
ompi_datatype_t *primitive_datatype;
|
||||
int primitive_count;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
@ -838,6 +840,13 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_base_get_primitive_type_info (datatype, &primitive_datatype, &primitive_count);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
primitive_count *= acc_header->count;
|
||||
|
||||
buflen = datatype_buffer_length (datatype, acc_header->count);
|
||||
|
||||
/* allocate a temporary buffer to receive the accumulate data */
|
||||
@ -854,8 +863,8 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_irecv_w_cb (buffer, acc_header->count, datatype, source, acc_header->tag,
|
||||
module->comm, NULL, accumulate_cb, acc_data);
|
||||
ret = ompi_osc_rdma_irecv_w_cb (buffer, primitive_count, primitive_datatype, source,
|
||||
acc_header->tag, module->comm, NULL, accumulate_cb, acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OBJ_RELEASE(acc_data);
|
||||
}
|
||||
@ -941,7 +950,9 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
|
||||
((unsigned long) get_acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
|
||||
struct osc_rdma_accumulate_data_t *acc_data;
|
||||
ompi_datatype_t *primitive_datatype;
|
||||
ompi_request_t *recv_request;
|
||||
int primitive_count;
|
||||
ompi_proc_t *proc;
|
||||
size_t buflen;
|
||||
void *buffer;
|
||||
@ -960,6 +971,13 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_base_get_primitive_type_info (datatype, &primitive_datatype, &primitive_count);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
primitive_count *= get_acc_header->count;
|
||||
|
||||
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, get_acc_header->count,
|
||||
datatype, op, 2, &acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
@ -973,7 +991,7 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, get_acc_header->count, datatype, source, get_acc_header->tag,
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, get_acc_header->tag,
|
||||
module->comm, accumulate_cb, acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
|
||||
/* cancel the receive and free the accumulate data */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user