
the case fix in ompi_osc_base_process_op in r31204. There are two cases that needed to be handled: - The target is a simple datatype (contiguous block of a primitive type) but the origin is not. In this case we still need to pack the origin data but we can not rely on the convertor to do the unpack (see r31204). - Both the origin and target datatypes are simple datatypes. In this case we can use ompi_op_reduce to do the accumulation without having to pack the origin data. cmr=v1.8:ticket=trac:4449 This commit was SVN r31231. The following SVN revision numbers were found above: r31204 --> open-mpi/ompi@949abe45cd The following Trac tickets were found above: Ticket 4449 --> https://svn.open-mpi.org/trac/ompi/ticket/4449
352 строки
14 KiB
C
352 строки
14 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/*
|
|
* utility functions for dealing with remote datatype and op structures
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "opal/datatype/opal_convertor.h"
|
|
#include "opal/datatype/opal_convertor_internal.h"
|
|
#include "opal/datatype/opal_datatype_prototypes.h"
|
|
|
|
#include "ompi/op/op.h"
|
|
#include "ompi/datatype/ompi_datatype.h"
|
|
#include "ompi/datatype/ompi_datatype_internal.h"
|
|
|
|
#include "osc_base_obj_convert.h"
|
|
#include "ompi/memchecker.h"
|
|
|
|
int
|
|
ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype,
|
|
ompi_datatype_t **prim_datatype,
|
|
uint32_t *prim_count)
|
|
{
|
|
ompi_datatype_t *primitive_datatype = NULL;
|
|
size_t datatype_size, primitive_size, primitive_count;
|
|
|
|
primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype);
|
|
if( NULL == primitive_datatype ) {
|
|
*prim_count = 0;
|
|
return OMPI_SUCCESS;
|
|
}
|
|
ompi_datatype_type_size( datatype, &datatype_size );
|
|
ompi_datatype_type_size( primitive_datatype, &primitive_size );
|
|
primitive_count = datatype_size / primitive_size;
|
|
#if OPAL_ENABLE_DEBUG
|
|
assert( 0 == (datatype_size % primitive_size) );
|
|
#endif /* OPAL_ENABLE_DEBUG */
|
|
|
|
/* We now have the count as a size_t, convert it to an uint32_t */
|
|
*prim_datatype = primitive_datatype;
|
|
*prim_count = (uint32_t)primitive_count;
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
struct ompi_osc_base_convertor_t {
|
|
opal_convertor_t convertor;
|
|
ompi_op_t *op;
|
|
ompi_datatype_t *datatype;
|
|
};
|
|
typedef struct ompi_osc_base_convertor_t ompi_osc_base_convertor_t;
|
|
static OBJ_CLASS_INSTANCE(ompi_osc_base_convertor_t, opal_convertor_t, NULL, NULL);
|
|
|
|
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
|
|
static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \
|
|
char* from, size_t from_len, ptrdiff_t from_extent, \
|
|
char* to, size_t to_len, ptrdiff_t to_extent, \
|
|
ptrdiff_t *advance) \
|
|
{ \
|
|
size_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
|
|
size_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
|
|
ompi_osc_base_convertor_t *osc_convertor = \
|
|
(ompi_osc_base_convertor_t*) pConvertor; \
|
|
\
|
|
if( (from_extent == (ptrdiff_t)local_TYPE_size) && \
|
|
(to_extent == (ptrdiff_t)remote_TYPE_size) ) { \
|
|
ompi_op_reduce(osc_convertor->op, from, to, count, osc_convertor->datatype); \
|
|
} else { \
|
|
uint32_t i; \
|
|
for( i = 0; i < count; i++ ) { \
|
|
ompi_op_reduce(osc_convertor->op, from, to, 1, osc_convertor->datatype); \
|
|
to += to_extent; \
|
|
from += from_extent; \
|
|
} \
|
|
} \
|
|
*advance = count * from_extent; \
|
|
return count; \
|
|
}
|
|
|
|
/* set up copy functions for the basic C MPI data types */
|
|
COPY_TYPE( int1, int8_t, 1)
|
|
COPY_TYPE( int2, int16_t, 1)
|
|
COPY_TYPE( int4, int32_t, 1)
|
|
COPY_TYPE( int8, int64_t, 1)
|
|
COPY_TYPE( bool, bool, 1)
|
|
COPY_TYPE( float, float, 1)
|
|
COPY_TYPE( double, double, 1)
|
|
#if HAVE_LONG_DOUBLE
|
|
COPY_TYPE( long_double, long double, 1)
|
|
#endif
|
|
COPY_TYPE( float_complex, float _Complex, 1)
|
|
COPY_TYPE( double_complex, double _Complex, 1)
|
|
#if HAVE_LONG_DOUBLE__COMPLEX
|
|
COPY_TYPE( long_double_complex, long double _Complex, 1)
|
|
#endif
|
|
|
|
/* table of predefined copy functions - one for each opal basic type */
|
|
static conversion_fct_t ompi_osc_base_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
|
|
[OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1,
|
|
[OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1,
|
|
[OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2,
|
|
[OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2,
|
|
[OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4,
|
|
[OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4,
|
|
[OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8,
|
|
[OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8,
|
|
#if SIZEOF_FLOAT == 2
|
|
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float,
|
|
#elif SIZEOF_DOUBLE == 2
|
|
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_double,
|
|
#elif SIZEOF_LONG_DOUBLE == 2
|
|
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_long_double,
|
|
#endif
|
|
#if SIZEOF_FLOAT == 4
|
|
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float,
|
|
#elif SIZEOF_DOUBLE == 4
|
|
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_double,
|
|
#elif SIZEOF_LONG_DOUBLE == 4
|
|
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_long_double,
|
|
#endif
|
|
#if SIZEOF_FLOAT == 8
|
|
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float,
|
|
#elif SIZEOF_DOUBLE == 8
|
|
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_double,
|
|
#elif SIZEOF_LONG_DOUBLE == 8
|
|
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_long_double,
|
|
#endif
|
|
#if SIZEOF_FLOAT == 16
|
|
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float,
|
|
#elif SIZEOF_DOUBLE == 16
|
|
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_double,
|
|
#elif SIZEOF_LONG_DOUBLE == 16
|
|
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_long_double,
|
|
#endif
|
|
[OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex,
|
|
[OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex,
|
|
#if HAVE_LONG_DOUBLE__COMPLEX
|
|
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex,
|
|
#endif
|
|
[OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool,
|
|
};
|
|
|
|
int
|
|
ompi_osc_base_process_op(void *outbuf,
|
|
void *inbuf,
|
|
size_t inbuflen,
|
|
struct ompi_datatype_t *datatype,
|
|
int count,
|
|
ompi_op_t *op)
|
|
{
|
|
if (op == &ompi_mpi_op_replace.op) {
|
|
return OMPI_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
if (ompi_datatype_is_predefined(datatype)) {
|
|
ompi_op_reduce(op, inbuf, outbuf, count, datatype);
|
|
} else {
|
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
|
ompi_osc_base_convertor_t convertor;
|
|
struct iovec iov;
|
|
uint32_t iov_count = 1;
|
|
size_t max_data;
|
|
struct opal_convertor_master_t master = {NULL, 0, 0, 0, {0, }, NULL};
|
|
|
|
primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype);
|
|
if (ompi_datatype_is_contiguous_memory_layout (datatype, count) &&
|
|
1 == datatype->super.desc.used) {
|
|
/* NTH: the datatype is made up of a contiguous block of the primitive
|
|
* datatype. do not use the convertor in this case since opal_unpack_general
|
|
can not handle it */
|
|
count *= datatype->super.desc.desc[0].elem.count;
|
|
ompi_op_reduce(op, inbuf, outbuf, count, primitive_datatype);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/* create convertor */
|
|
OBJ_CONSTRUCT(&convertor, ompi_osc_base_convertor_t);
|
|
convertor.op = op;
|
|
convertor.datatype = primitive_datatype;
|
|
|
|
/* initialize convertor */
|
|
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
|
&(datatype->super),
|
|
count,
|
|
outbuf,
|
|
0,
|
|
&convertor.convertor);
|
|
|
|
memcpy(&master, convertor.convertor.master, sizeof(struct opal_convertor_master_t));
|
|
master.next = convertor.convertor.master;
|
|
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
|
|
convertor.convertor.master = &master;
|
|
convertor.convertor.fAdvance = opal_unpack_general;
|
|
/* there are issues with using the optimized description here */
|
|
convertor.convertor.use_desc = &datatype->super.desc;
|
|
|
|
iov.iov_len = inbuflen;
|
|
iov.iov_base = (IOVBASE_TYPE*) inbuf;
|
|
max_data = iov.iov_len;
|
|
MEMCHECKER(
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
|
|
&convertor.convertor);
|
|
);
|
|
opal_convertor_unpack(&convertor.convertor,
|
|
&iov,
|
|
&iov_count,
|
|
&max_data);
|
|
MEMCHECKER(
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
|
|
&convertor.convertor);
|
|
);
|
|
OBJ_DESTRUCT(&convertor);
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
int
|
|
ompi_osc_base_sndrcv_op(void *origin,
|
|
int32_t origin_count,
|
|
struct ompi_datatype_t *origin_dt,
|
|
void *target,
|
|
int32_t target_count,
|
|
struct ompi_datatype_t *target_dt,
|
|
ompi_op_t *op)
|
|
{
|
|
ompi_datatype_t *origin_primitive, *target_primitive;
|
|
|
|
if (ompi_datatype_is_predefined(origin_dt) && origin_dt == target_dt) {
|
|
ompi_op_reduce(op, origin, target, origin_count, origin_dt);
|
|
} else {
|
|
ompi_osc_base_convertor_t recv_convertor;
|
|
opal_convertor_t send_convertor;
|
|
struct iovec iov;
|
|
uint32_t iov_count = 1;
|
|
size_t max_data;
|
|
int completed, length;
|
|
bool contiguous_target = false;
|
|
struct opal_convertor_master_t master = {NULL, 0, 0, 0, {0, }, NULL};
|
|
|
|
origin_primitive = ompi_datatype_get_single_predefined_type_from_args(origin_dt);
|
|
target_primitive = ompi_datatype_get_single_predefined_type_from_args(target_dt);
|
|
|
|
/* check that the two primitives are the same */
|
|
if (OPAL_UNLIKELY(origin_primitive != target_primitive)) {
|
|
return OMPI_ERR_RMA_SYNC;
|
|
}
|
|
|
|
if (ompi_datatype_is_contiguous_memory_layout (target_dt, target_count) &&
|
|
1 == target_dt->super.desc.used) {
|
|
/* NTH: the target datatype is made up of a contiguous block of the primitive
|
|
* datatype. do not use the convertor in this case since opal_unpack_general
|
|
can not handle it */
|
|
target_count *= target_dt->super.desc.desc[0].elem.count;
|
|
contiguous_target = true;
|
|
}
|
|
|
|
if (ompi_datatype_is_contiguous_memory_layout (origin_dt, origin_count) &&
|
|
1 == origin_dt->super.desc.used && contiguous_target) {
|
|
/* NTH: both datatypes are contiguous blocks of the same primitive datatype */
|
|
origin_count *= origin_dt->super.desc.desc[0].elem.count;
|
|
|
|
/* NTH: should we proper checks for this case */
|
|
assert (origin_count <= target_count);
|
|
|
|
ompi_op_reduce(op, origin, target, origin_count, origin_primitive);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
/* initialize send convertor */
|
|
OBJ_CONSTRUCT(&send_convertor, opal_convertor_t);
|
|
opal_convertor_copy_and_prepare_for_send(ompi_proc_local()->proc_convertor,
|
|
&(origin_dt->super), origin_count, origin, 0,
|
|
&send_convertor);
|
|
|
|
/* initialize recv convertor */
|
|
if (!contiguous_target) {
|
|
OBJ_CONSTRUCT(&recv_convertor, ompi_osc_base_convertor_t);
|
|
recv_convertor.op = op;
|
|
recv_convertor.datatype = ompi_datatype_get_single_predefined_type_from_args(target_dt);
|
|
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
|
&(target_dt->super), target_count,
|
|
target, 0, &recv_convertor.convertor);
|
|
|
|
memcpy(&master, recv_convertor.convertor.master, sizeof(struct opal_convertor_master_t));
|
|
master.next = recv_convertor.convertor.master;
|
|
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
|
|
recv_convertor.convertor.master = &master;
|
|
recv_convertor.convertor.fAdvance = opal_unpack_general;
|
|
}
|
|
|
|
/* copy */
|
|
iov.iov_len = length = 64 * 1024;
|
|
iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) );
|
|
|
|
completed = 0;
|
|
while(0 == completed) {
|
|
iov.iov_len = length;
|
|
iov_count = 1;
|
|
max_data = length;
|
|
completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
|
|
|
|
if (contiguous_target) {
|
|
int packed_count = (int) (max_data / target_primitive->super.size);
|
|
|
|
ompi_op_reduce(op, iov.iov_base, target, packed_count, target_primitive);
|
|
target = (void *)((uintptr_t) target + max_data);
|
|
|
|
target_count -= packed_count;
|
|
|
|
/* NTH: at this time it is erroneous for this function to be called if the
|
|
* target is not large enough to fit the data */
|
|
assert (target_count >= 0);
|
|
completed |= (0 == target_count);
|
|
} else {
|
|
completed |= opal_convertor_unpack( &recv_convertor.convertor, &iov, &iov_count, &max_data );
|
|
}
|
|
}
|
|
free( iov.iov_base );
|
|
OBJ_DESTRUCT( &send_convertor );
|
|
|
|
if (!contiguous_target) {
|
|
OBJ_DESTRUCT( &recv_convertor );
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|