1
1
openmpi/ompi/mca/osc/base/osc_base_obj_convert.c
Nathan Hjelm 949abe45cd osc: fix datatype related issues in the one-sided code
This commit fixes two issues:

 - osc/rdma: The target side of an accumulate was using the target datatype
   in the receive to the packed buffer. This was conflicting with the way
   the reduction is done into the target buffer. Changed the receive to use
   the primitive datatype.

 - osc/base: The copy table was completely wrong. Fixed the table to match
   the underlying datatypes (which are opal not ompi datatypes).

 - osc/base: There is a problem using the optimized description. Fall back
   on using the non-optimized description until we can understand what is
   going wrong.

cmr=v1.8:reviewer=jsquyres

This commit was SVN r31204.
2014-03-25 15:28:48 +00:00

290 строки
12 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* utility functions for dealing with remote datatype and op structures
*/
#include "ompi_config.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_convertor_internal.h"
#include "opal/datatype/opal_datatype_prototypes.h"
#include "ompi/op/op.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/datatype/ompi_datatype_internal.h"
#include "osc_base_obj_convert.h"
#include "ompi/memchecker.h"
int
ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype,
ompi_datatype_t **prim_datatype,
uint32_t *prim_count)
{
ompi_datatype_t *primitive_datatype = NULL;
size_t datatype_size, primitive_size, primitive_count;
primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype);
if( NULL == primitive_datatype ) {
*prim_count = 0;
return OMPI_SUCCESS;
}
ompi_datatype_type_size( datatype, &datatype_size );
ompi_datatype_type_size( primitive_datatype, &primitive_size );
primitive_count = datatype_size / primitive_size;
#if OPAL_ENABLE_DEBUG
assert( 0 == (datatype_size % primitive_size) );
#endif /* OPAL_ENABLE_DEBUG */
/* We now have the count as a size_t, convert it to an uint32_t */
*prim_datatype = primitive_datatype;
*prim_count = (uint32_t)primitive_count;
return OMPI_SUCCESS;
}
struct ompi_osc_base_convertor_t {
opal_convertor_t convertor;
ompi_op_t *op;
ompi_datatype_t *datatype;
};
typedef struct ompi_osc_base_convertor_t ompi_osc_base_convertor_t;
static OBJ_CLASS_INSTANCE(ompi_osc_base_convertor_t, opal_convertor_t, NULL, NULL);
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \
char* from, size_t from_len, ptrdiff_t from_extent, \
char* to, size_t to_len, ptrdiff_t to_extent, \
ptrdiff_t *advance) \
{ \
size_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
size_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
ompi_osc_base_convertor_t *osc_convertor = \
(ompi_osc_base_convertor_t*) pConvertor; \
\
if( (from_extent == (ptrdiff_t)local_TYPE_size) && \
(to_extent == (ptrdiff_t)remote_TYPE_size) ) { \
ompi_op_reduce(osc_convertor->op, from, to, count, osc_convertor->datatype); \
} else { \
uint32_t i; \
for( i = 0; i < count; i++ ) { \
ompi_op_reduce(osc_convertor->op, from, to, 1, osc_convertor->datatype); \
to += to_extent; \
from += from_extent; \
} \
} \
*advance = count * from_extent; \
return count; \
}
/* set up copy functions for the basic C MPI data types */
COPY_TYPE( int1, int8_t, 1)
COPY_TYPE( int2, int16_t, 1)
COPY_TYPE( int4, int32_t, 1)
COPY_TYPE( int8, int64_t, 1)
COPY_TYPE( bool, bool, 1)
COPY_TYPE( float, float, 1)
COPY_TYPE( double, double, 1)
#if HAVE_LONG_DOUBLE
COPY_TYPE( long_double, long double, 1)
#endif
COPY_TYPE( float_complex, float _Complex, 1)
COPY_TYPE( double_complex, double _Complex, 1)
#if HAVE_LONG_DOUBLE__COMPLEX
COPY_TYPE( long_double_complex, long double _Complex, 1)
#endif
/* table of predefined copy functions - one for each opal basic type */
static conversion_fct_t ompi_osc_base_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
[OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1,
[OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1,
[OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2,
[OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2,
[OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4,
[OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4,
[OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8,
[OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8,
#if SIZEOF_FLOAT == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 2
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_long_double,
#endif
#if SIZEOF_FLOAT == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 4
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_long_double,
#endif
#if SIZEOF_FLOAT == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 8
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_long_double,
#endif
#if SIZEOF_FLOAT == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float,
#elif SIZEOF_DOUBLE == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_double,
#elif SIZEOF_LONG_DOUBLE == 16
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_long_double,
#endif
[OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex,
[OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex,
#if HAVE_LONG_DOUBLE__COMPLEX
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex,
#endif
[OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool,
};
int
ompi_osc_base_process_op(void *outbuf,
void *inbuf,
size_t inbuflen,
struct ompi_datatype_t *datatype,
int count,
ompi_op_t *op)
{
if (op == &ompi_mpi_op_replace.op) {
return OMPI_ERR_NOT_SUPPORTED;
}
if (ompi_datatype_is_predefined(datatype)) {
ompi_op_reduce(op, inbuf, outbuf, count, datatype);
} else {
struct ompi_datatype_t *primitive_datatype = NULL;
ompi_osc_base_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
struct opal_convertor_master_t master = {NULL, 0, 0, 0, {0, }, NULL};
primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype);
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_osc_base_convertor_t);
convertor.op = op;
convertor.datatype = primitive_datatype;
/* initialize convertor */
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
&(datatype->super),
count,
outbuf,
0,
&convertor.convertor);
memcpy(&master, convertor.convertor.master, sizeof(struct opal_convertor_master_t));
master.next = convertor.convertor.master;
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
convertor.convertor.master = &master;
convertor.convertor.fAdvance = opal_unpack_general;
/* there are issues with using the optimized description here */
convertor.convertor.use_desc = &datatype->super.desc;
iov.iov_len = inbuflen;
iov.iov_base = (IOVBASE_TYPE*) inbuf;
max_data = iov.iov_len;
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
&convertor.convertor);
);
opal_convertor_unpack(&convertor.convertor,
&iov,
&iov_count,
&max_data);
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
&convertor.convertor);
);
OBJ_DESTRUCT(&convertor);
}
return OMPI_SUCCESS;
}
int
ompi_osc_base_sndrcv_op(void *origin,
int32_t origin_count,
struct ompi_datatype_t *origin_dt,
void *target,
int32_t target_count,
struct ompi_datatype_t *target_dt,
ompi_op_t *op)
{
if (ompi_datatype_is_predefined(origin_dt) && origin_dt == target_dt) {
ompi_op_reduce(op, origin, target, origin_count, origin_dt);
} else {
ompi_osc_base_convertor_t recv_convertor;
opal_convertor_t send_convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
int completed, length;
struct opal_convertor_master_t master = {NULL, 0, 0, 0, {0, }, NULL};
/* initialize send convertor */
OBJ_CONSTRUCT(&send_convertor, opal_convertor_t);
opal_convertor_copy_and_prepare_for_send(ompi_proc_local()->proc_convertor,
&(origin_dt->super), origin_count, origin, 0,
&send_convertor);
/* initialize recv convertor */
OBJ_CONSTRUCT(&recv_convertor, ompi_osc_base_convertor_t);
recv_convertor.op = op;
recv_convertor.datatype = ompi_datatype_get_single_predefined_type_from_args(target_dt);
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
&(target_dt->super), target_count,
target, 0, &recv_convertor.convertor);
memcpy(&master, recv_convertor.convertor.master, sizeof(struct opal_convertor_master_t));
master.next = recv_convertor.convertor.master;
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
recv_convertor.convertor.master = &master;
recv_convertor.convertor.fAdvance = opal_unpack_general;
/* copy */
iov.iov_len = length = 64 * 1024;
iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) );
completed = 0;
while(0 == completed) {
iov.iov_len = length;
iov_count = 1;
max_data = length;
completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
completed |= opal_convertor_unpack( &recv_convertor.convertor, &iov, &iov_count, &max_data );
}
free( iov.iov_base );
OBJ_DESTRUCT( &send_convertor );
OBJ_DESTRUCT( &recv_convertor );
}
return OMPI_SUCCESS;
}