Support reduction operations other than MPI_REPLACE for user-defined
datatypes with MPI_ACCUMULATE This commit was SVN r15418.
Этот коммит содержится в:
родитель
bfe682b6c7
Коммит
7a9a8c7e17
@ -15,9 +15,11 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
headers += \
|
headers += \
|
||||||
base/base.h
|
base/base.h \
|
||||||
|
base/osc_base_obj_convert.h
|
||||||
|
|
||||||
libmca_osc_la_SOURCES += \
|
libmca_osc_la_SOURCES += \
|
||||||
base/osc_base_close.c \
|
base/osc_base_close.c \
|
||||||
base/osc_base_open.c \
|
base/osc_base_open.c \
|
||||||
base/osc_base_init.c
|
base/osc_base_init.c \
|
||||||
|
base/osc_base_obj_convert.c
|
||||||
|
201
ompi/mca/osc/base/osc_base_obj_convert.c
Обычный файл
201
ompi/mca/osc/base/osc_base_obj_convert.c
Обычный файл
@ -0,0 +1,201 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* utility functions for dealing with remote datatype and op structures
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#include "ompi/op/op.h"
|
||||||
|
#include "ompi/datatype/datatype.h"
|
||||||
|
#include "ompi/datatype/datatype_internal.h"
|
||||||
|
#include "ompi/datatype/convertor.h"
|
||||||
|
#include "ompi/datatype/convertor_internal.h"
|
||||||
|
#include "ompi/datatype/datatype_prototypes.h"
|
||||||
|
|
||||||
|
#include "osc_base_obj_convert.h"
|
||||||
|
|
||||||
|
struct ompi_osc_base_convertor_t {
|
||||||
|
ompi_convertor_t convertor;
|
||||||
|
ompi_op_t *op;
|
||||||
|
ompi_datatype_t *datatype;
|
||||||
|
};
|
||||||
|
typedef struct ompi_osc_base_convertor_t ompi_osc_base_convertor_t;
|
||||||
|
static OBJ_CLASS_INSTANCE(ompi_osc_base_convertor_t, ompi_convertor_t, NULL, NULL);
|
||||||
|
|
||||||
|
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
|
||||||
|
static int copy_##TYPENAME( ompi_convertor_t *pConvertor, uint32_t count, \
|
||||||
|
char* from, size_t from_len, ptrdiff_t from_extent, \
|
||||||
|
char* to, size_t to_len, ptrdiff_t to_extent, \
|
||||||
|
ptrdiff_t *advance) \
|
||||||
|
{ \
|
||||||
|
ompi_osc_base_convertor_t *osc_convertor = \
|
||||||
|
(ompi_osc_base_convertor_t*) pConvertor; \
|
||||||
|
\
|
||||||
|
ompi_op_reduce(osc_convertor->op, from, to, count, osc_convertor->datatype); \
|
||||||
|
*advance = count * from_extent; \
|
||||||
|
return count; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set up copy functions for the basic C MPI data types */
|
||||||
|
COPY_TYPE( char, char, 1 )
|
||||||
|
COPY_TYPE( short, short, 1 )
|
||||||
|
COPY_TYPE( int, int, 1 )
|
||||||
|
COPY_TYPE( long, long, 1 )
|
||||||
|
COPY_TYPE( long_long, long long, 1 )
|
||||||
|
COPY_TYPE( float, float, 1 )
|
||||||
|
COPY_TYPE( double, double, 1 )
|
||||||
|
COPY_TYPE( long_double, long double, 1 )
|
||||||
|
COPY_TYPE( complex_float, ompi_complex_float_t, 1 )
|
||||||
|
COPY_TYPE( complex_double, ompi_complex_double_t, 1 )
|
||||||
|
COPY_TYPE( complex_long_double, ompi_complex_long_double_t, 1 )
|
||||||
|
|
||||||
|
/* table of predefined copy functions - one for each MPI type */
|
||||||
|
static conversion_fct_t ompi_osc_base_copy_functions[DT_MAX_PREDEFINED] = {
|
||||||
|
(conversion_fct_t)NULL, /* DT_LOOP */
|
||||||
|
(conversion_fct_t)NULL, /* DT_END_LOOP */
|
||||||
|
(conversion_fct_t)NULL, /* DT_LB */
|
||||||
|
(conversion_fct_t)NULL, /* DT_UB */
|
||||||
|
(conversion_fct_t)copy_char, /* DT_CHAR */
|
||||||
|
(conversion_fct_t)copy_char, /* DT_CHARACTER */
|
||||||
|
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
|
||||||
|
(conversion_fct_t)copy_char, /* DT_SIGNED_CHAR */
|
||||||
|
(conversion_fct_t)copy_char, /* DT_BYTE */
|
||||||
|
(conversion_fct_t)copy_short, /* DT_SHORT */
|
||||||
|
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
|
||||||
|
(conversion_fct_t)copy_int, /* DT_INT */
|
||||||
|
(conversion_fct_t)copy_int, /* DT_UNSIGNED_INT */
|
||||||
|
(conversion_fct_t)copy_long, /* DT_LONG */
|
||||||
|
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
|
||||||
|
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG_INT */
|
||||||
|
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
|
||||||
|
(conversion_fct_t)copy_float, /* DT_FLOAT */
|
||||||
|
(conversion_fct_t)copy_double, /* DT_DOUBLE */
|
||||||
|
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
|
||||||
|
(conversion_fct_t)NULL, /* DT_PACKED */
|
||||||
|
(conversion_fct_t)NULL, /* DT_WCHAR */
|
||||||
|
#if SIZEOF_BOOL == SIZEOF_CHAR
|
||||||
|
(conversion_fct_t)copy_char, /* DT_CXX_BOOL */
|
||||||
|
#elif SIZEOF_BOOL == SIZEOF_SHORT
|
||||||
|
(conversion_fct_t)copy_short, /* DT_CXX_BOOL */
|
||||||
|
#elif SIZEOF_BOOL == SIZEOF_INT
|
||||||
|
(conversion_fct_t)copy_int, /* DT_CXX_BOOL */
|
||||||
|
#elif SIZEOF_BOOL == SIZEOF_LONG
|
||||||
|
(conversion_fct_t)copy_long, /* DT_CXX_BOOL */
|
||||||
|
#else
|
||||||
|
(conversion_fct_t)NULL, /* DT_CXX_BOOL */
|
||||||
|
#endif
|
||||||
|
#if OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_CHAR
|
||||||
|
(conversion_fct_t)copy_char, /* DT_LOGIC */
|
||||||
|
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_SHORT
|
||||||
|
(conversion_fct_t)copy_short, /* DT_LOGIC */
|
||||||
|
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT
|
||||||
|
(conversion_fct_t)copy_int, /* DT_LOGIC */
|
||||||
|
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_LONG
|
||||||
|
(conversion_fct_t)copy_long, /* DT_LOGIC */
|
||||||
|
#else
|
||||||
|
(conversion_fct_t)NULL, /* DT_LOGIC */
|
||||||
|
#endif
|
||||||
|
(conversion_fct_t)copy_int, /* DT_INTEGER */
|
||||||
|
(conversion_fct_t)copy_float, /* DT_REAL */
|
||||||
|
(conversion_fct_t)copy_double, /* DT_DBLPREC */
|
||||||
|
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
|
||||||
|
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
|
||||||
|
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2INTEGER */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2REAL */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2DBLPREC */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2COMPLEX */
|
||||||
|
(conversion_fct_t)NULL, /* DT_2DOUBLE_COMPLEX */
|
||||||
|
(conversion_fct_t)NULL, /* DT_FLOAT_INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_DOUBLE_INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_LONG_DOUBLE_INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_LONG_INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_SHORT_INT */
|
||||||
|
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
|
||||||
|
};
|
||||||
|
|
||||||
|
int
|
||||||
|
ompi_osc_base_process_op(void *outbuf,
|
||||||
|
void *inbuf,
|
||||||
|
size_t inbuflen,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int count,
|
||||||
|
ompi_op_t *op)
|
||||||
|
{
|
||||||
|
if (op == &ompi_mpi_op_replace) {
|
||||||
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ompi_ddt_is_predefined(datatype)) {
|
||||||
|
ompi_op_reduce(op, inbuf, outbuf, count, datatype);
|
||||||
|
} else {
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
ompi_osc_base_convertor_t convertor;
|
||||||
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data;
|
||||||
|
struct ompi_convertor_master_t master;
|
||||||
|
int i, found_index = -1;
|
||||||
|
uint64_t mask = 1;
|
||||||
|
|
||||||
|
for (i = 0 ; i < DT_MAX_PREDEFINED ; ++i) {
|
||||||
|
if (datatype->bdt_used & mask) {
|
||||||
|
found_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
mask *= 2;
|
||||||
|
}
|
||||||
|
primitive_datatype = (ompi_datatype_t*)
|
||||||
|
ompi_ddt_basicDatatypes[found_index];
|
||||||
|
primitive_count = datatype->nbElems;
|
||||||
|
|
||||||
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_osc_base_convertor_t);
|
||||||
|
convertor.op = op;
|
||||||
|
convertor.datatype = primitive_datatype;
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
||||||
|
datatype,
|
||||||
|
count,
|
||||||
|
outbuf,
|
||||||
|
0,
|
||||||
|
&convertor.convertor);
|
||||||
|
|
||||||
|
memcpy(&master, convertor.convertor.master, sizeof(struct ompi_convertor_master_t));
|
||||||
|
master.next = convertor.convertor.master;
|
||||||
|
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
|
||||||
|
convertor.convertor.master = &master;
|
||||||
|
convertor.convertor.fAdvance = ompi_unpack_general;
|
||||||
|
|
||||||
|
iov.iov_len = inbuflen;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*) inbuf;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor.convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
149
ompi/mca/osc/base/osc_base_obj_convert.h
Обычный файл
149
ompi/mca/osc/base/osc_base_obj_convert.h
Обычный файл
@ -0,0 +1,149 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
*
|
||||||
|
* Utility functions for Open MPI object manipulation by the One-sided code
|
||||||
|
*
|
||||||
|
* Utility functions for creating / finding handles for Open MPI
|
||||||
|
* objects, usually based on indexes sent from remote peers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi/datatype/datatype.h"
|
||||||
|
#include "ompi/datatype/datatype_internal.h"
|
||||||
|
#include "ompi/proc/proc.h"
|
||||||
|
#include "ompi/op/op.h"
|
||||||
|
|
||||||
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create datatype based on packed payload
|
||||||
|
*
|
||||||
|
* Create a useable MPI datatype based on it's packed description.
|
||||||
|
* The datatype is owned by the calling process and must be
|
||||||
|
* OBJ_RELEASEd when no longer in use.
|
||||||
|
*
|
||||||
|
* @param remote_proc The ompi_proc_t pointer for the remote process
|
||||||
|
* @param payload A pointer to the pointer to the payload. The
|
||||||
|
* pointer to the payload will be moved past the
|
||||||
|
* datatype information upon successful return.
|
||||||
|
*
|
||||||
|
* @retval NULL A failure occrred
|
||||||
|
* @retval non-NULL A fully operational datatype
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
struct ompi_datatype_t*
|
||||||
|
ompi_osc_base_datatype_create(ompi_proc_t *remote_proc, void **payload)
|
||||||
|
{
|
||||||
|
struct ompi_datatype_t *datatype =
|
||||||
|
ompi_ddt_create_from_packed_description(payload, remote_proc);
|
||||||
|
if (NULL == datatype) return NULL;
|
||||||
|
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
|
||||||
|
return datatype;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create datatype based on Fortran Index
|
||||||
|
*
|
||||||
|
* Create a useable MPI datatype based on it's Fortran index, which is
|
||||||
|
* globally the same for predefined operations. The op handle is
|
||||||
|
* owned by the calling process and must be OBJ_RELEASEd when no
|
||||||
|
* longer in use.
|
||||||
|
*
|
||||||
|
* @param op_id The fortran index for the operaton
|
||||||
|
*
|
||||||
|
* @retval NULL A failure occrred
|
||||||
|
* @retval non-NULL An op handle
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
ompi_op_t *
|
||||||
|
ompi_osc_base_op_create(int op_id)
|
||||||
|
{
|
||||||
|
ompi_op_t *op = MPI_Op_f2c(op_id);
|
||||||
|
OBJ_RETAIN(op);
|
||||||
|
return op;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the primitive datatype information for a legal one-sided accumulate datatype
|
||||||
|
*
|
||||||
|
* Get the primitive datatype information for a legal one-sided
|
||||||
|
* accumulate datatype. This includes the primitive datatype used to
|
||||||
|
* build the datatype (there can be only one) and the number of
|
||||||
|
* instances of that primitive datatype in the datatype (there can be
|
||||||
|
* many).
|
||||||
|
*
|
||||||
|
* @param datatype legal one-sided datatype
|
||||||
|
* @param prim_datatype The primitive datatype used to build datatype
|
||||||
|
* @param prim_count Number of instances of prim_datattpe in datatype
|
||||||
|
*
|
||||||
|
* @retval OMPI_SUCCESS Success
|
||||||
|
*/
|
||||||
|
static inline int
|
||||||
|
ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype,
|
||||||
|
ompi_datatype_t **prim_datatype,
|
||||||
|
uint32_t *prim_count)
|
||||||
|
{
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
|
||||||
|
/* get underlying type... */
|
||||||
|
if (ompi_ddt_is_predefined(datatype)) {
|
||||||
|
primitive_datatype = datatype;
|
||||||
|
primitive_count = 1;
|
||||||
|
} else {
|
||||||
|
int i, found_index = -1;
|
||||||
|
uint64_t mask = 1;
|
||||||
|
for (i = 0 ; i < DT_MAX_PREDEFINED ; ++i) {
|
||||||
|
if (datatype->bdt_used & mask) {
|
||||||
|
found_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
mask *= 2;
|
||||||
|
}
|
||||||
|
primitive_datatype = (ompi_datatype_t*)
|
||||||
|
ompi_ddt_basicDatatypes[found_index];
|
||||||
|
primitive_count = datatype->nbElems;
|
||||||
|
}
|
||||||
|
|
||||||
|
*prim_datatype = primitive_datatype;
|
||||||
|
*prim_count = primitive_count;
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply the operation specified from inbuf to outbut
|
||||||
|
*
|
||||||
|
* Apply the specified reduction operation from inbuf to outbuf.
|
||||||
|
* inbuf must contain count instances of datatype, in the local
|
||||||
|
* process's binary mode.
|
||||||
|
*
|
||||||
|
* @retval OMPI_SUCCESS Success
|
||||||
|
* @retval OMPI_ERR_NOT_SUPPORTED Called with op == ompi_mpi_op_replace
|
||||||
|
*/
|
||||||
|
int ompi_osc_base_process_op(void *outbuf,
|
||||||
|
void *inbuf,
|
||||||
|
size_t inbuflen,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int count,
|
||||||
|
ompi_op_t *op);
|
||||||
|
|
||||||
|
END_C_DECLS
|
@ -32,8 +32,6 @@ pt2pt_sources = \
|
|||||||
osc_pt2pt_longreq.c \
|
osc_pt2pt_longreq.c \
|
||||||
osc_pt2pt_mpireq.h \
|
osc_pt2pt_mpireq.h \
|
||||||
osc_pt2pt_mpireq.c \
|
osc_pt2pt_mpireq.c \
|
||||||
osc_pt2pt_obj_convert.h \
|
|
||||||
osc_pt2pt_obj_convert.c \
|
|
||||||
osc_pt2pt_replyreq.h \
|
osc_pt2pt_replyreq.h \
|
||||||
osc_pt2pt_replyreq.c \
|
osc_pt2pt_replyreq.c \
|
||||||
osc_pt2pt_sendreq.h \
|
osc_pt2pt_sendreq.h \
|
||||||
|
@ -60,14 +60,6 @@ ompi_osc_pt2pt_module_accumulate(void *origin_addr, int origin_count,
|
|||||||
OMPI_WIN_EXPOSE_EPOCH);
|
OMPI_WIN_EXPOSE_EPOCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op != &ompi_mpi_op_replace &&
|
|
||||||
!ompi_ddt_is_predefined(target_dt)) {
|
|
||||||
fprintf(stderr, "MPI_Accumulate currently does not support reductions\n");
|
|
||||||
fprintf(stderr, "with any user-defined types. This will be rectified\n");
|
|
||||||
fprintf(stderr, "in a future release.\n");
|
|
||||||
return MPI_ERR_UNSUPPORTED_OPERATION;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* shortcut 0 count case */
|
/* shortcut 0 count case */
|
||||||
if (0 == origin_count || 0 == target_count) {
|
if (0 == origin_count || 0 == target_count) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
#include "osc_pt2pt_sendreq.h"
|
#include "osc_pt2pt_sendreq.h"
|
||||||
#include "osc_pt2pt_replyreq.h"
|
#include "osc_pt2pt_replyreq.h"
|
||||||
#include "osc_pt2pt_header.h"
|
#include "osc_pt2pt_header.h"
|
||||||
#include "osc_pt2pt_obj_convert.h"
|
|
||||||
#include "osc_pt2pt_data_move.h"
|
#include "osc_pt2pt_data_move.h"
|
||||||
#include "osc_pt2pt_buffer.h"
|
#include "osc_pt2pt_buffer.h"
|
||||||
|
|
||||||
@ -33,6 +32,7 @@
|
|||||||
#include "ompi/communicator/communicator.h"
|
#include "ompi/communicator/communicator.h"
|
||||||
#include "ompi/mca/osc/osc.h"
|
#include "ompi/mca/osc/osc.h"
|
||||||
#include "ompi/mca/osc/base/base.h"
|
#include "ompi/mca/osc/base/base.h"
|
||||||
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
||||||
#include "ompi/mca/pml/pml.h"
|
#include "ompi/mca/pml/pml.h"
|
||||||
#include "ompi/datatype/dt_arch.h"
|
#include "ompi/datatype/dt_arch.h"
|
||||||
|
|
||||||
@ -535,7 +535,7 @@ component_fragment_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
|||||||
|
|
||||||
/* create or get a pointer to our datatype */
|
/* create or get a pointer to our datatype */
|
||||||
proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
||||||
datatype = ompi_osc_pt2pt_datatype_create(proc, &payload);
|
datatype = ompi_osc_base_datatype_create(proc, &payload);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
|
@ -20,7 +20,6 @@
|
|||||||
#include "osc_pt2pt_sendreq.h"
|
#include "osc_pt2pt_sendreq.h"
|
||||||
#include "osc_pt2pt_header.h"
|
#include "osc_pt2pt_header.h"
|
||||||
#include "osc_pt2pt_data_move.h"
|
#include "osc_pt2pt_data_move.h"
|
||||||
#include "osc_pt2pt_obj_convert.h"
|
|
||||||
#include "osc_pt2pt_buffer.h"
|
#include "osc_pt2pt_buffer.h"
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
@ -29,6 +28,7 @@
|
|||||||
#include "ompi/datatype/datatype.h"
|
#include "ompi/datatype/datatype.h"
|
||||||
#include "ompi/datatype/dt_arch.h"
|
#include "ompi/datatype/dt_arch.h"
|
||||||
#include "ompi/mca/osc/base/base.h"
|
#include "ompi/mca/osc/base/base.h"
|
||||||
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
||||||
|
|
||||||
|
|
||||||
static inline int32_t
|
static inline int32_t
|
||||||
@ -525,7 +525,7 @@ ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
|
|||||||
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
||||||
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
||||||
struct ompi_datatype_t *datatype =
|
struct ompi_datatype_t *datatype =
|
||||||
ompi_osc_pt2pt_datatype_create(proc, &inbuf);
|
ompi_osc_base_datatype_create(proc, &inbuf);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
@ -599,21 +599,51 @@ ompi_osc_pt2pt_sendreq_recv_accum_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
|||||||
{
|
{
|
||||||
ompi_osc_pt2pt_longreq_t *longreq =
|
ompi_osc_pt2pt_longreq_t *longreq =
|
||||||
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
||||||
|
ompi_osc_pt2pt_module_t *module = longreq->req_module;
|
||||||
ompi_osc_pt2pt_send_header_t *header =
|
ompi_osc_pt2pt_send_header_t *header =
|
||||||
(ompi_osc_pt2pt_send_header_t*) mpireq->cbdata;
|
(ompi_osc_pt2pt_send_header_t*) mpireq->cbdata;
|
||||||
void *payload = (void*) (header + 1);
|
void *payload = (void*) (header + 1);
|
||||||
int ret;
|
int ret;
|
||||||
|
void *target = (unsigned char*) module->p2p_win->w_baseptr +
|
||||||
|
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
||||||
|
|
||||||
/* lock the window for accumulates */
|
/* lock the window for accumulates */
|
||||||
OPAL_THREAD_LOCK(&longreq->req_module->p2p_acc_lock);
|
OPAL_THREAD_LOCK(&longreq->req_module->p2p_acc_lock);
|
||||||
|
|
||||||
/* copy the data from the temporary buffer into the user window */
|
if (longreq->req_op == &ompi_mpi_op_replace) {
|
||||||
ret = ompi_osc_pt2pt_process_op(longreq->req_module,
|
ompi_convertor_t convertor;
|
||||||
header,
|
struct iovec iov;
|
||||||
longreq->req_datatype,
|
uint32_t iov_count = 1;
|
||||||
longreq->req_op,
|
size_t max_data;
|
||||||
payload,
|
|
||||||
header->hdr_msg_length);
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
||||||
|
longreq->req_datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
target,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*) payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
/* copy the data from the temporary buffer into the user window */
|
||||||
|
ret = ompi_osc_base_process_op(target,
|
||||||
|
payload,
|
||||||
|
header->hdr_msg_length,
|
||||||
|
longreq->req_datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
longreq->req_op);
|
||||||
|
}
|
||||||
|
|
||||||
/* unlock the window for accumulates */
|
/* unlock the window for accumulates */
|
||||||
OPAL_THREAD_UNLOCK(&longreq->req_module->p2p_acc_lock);
|
OPAL_THREAD_UNLOCK(&longreq->req_module->p2p_acc_lock);
|
||||||
@ -642,10 +672,12 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
|
|||||||
void *payload)
|
void *payload)
|
||||||
{
|
{
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
struct ompi_op_t *op = ompi_osc_pt2pt_op_create(header->hdr_target_op);
|
struct ompi_op_t *op = ompi_osc_base_op_create(header->hdr_target_op);
|
||||||
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
||||||
struct ompi_datatype_t *datatype =
|
struct ompi_datatype_t *datatype =
|
||||||
ompi_osc_pt2pt_datatype_create(proc, &payload);
|
ompi_osc_base_datatype_create(proc, &payload);
|
||||||
|
void *target = (unsigned char*) module->p2p_win->w_baseptr +
|
||||||
|
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
@ -657,9 +689,92 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
|
|||||||
/* lock the window for accumulates */
|
/* lock the window for accumulates */
|
||||||
OPAL_THREAD_LOCK(&module->p2p_acc_lock);
|
OPAL_THREAD_LOCK(&module->p2p_acc_lock);
|
||||||
|
|
||||||
/* copy the data from the temporary buffer into the user window */
|
if (op == &ompi_mpi_op_replace) {
|
||||||
ret = ompi_osc_pt2pt_process_op(module, header, datatype, op, payload,
|
ompi_convertor_t convertor;
|
||||||
header->hdr_msg_length);
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data;
|
||||||
|
|
||||||
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
||||||
|
datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
target,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*)payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
void *buffer = NULL;
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
||||||
|
ompi_convertor_t convertor;
|
||||||
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data;
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
size_t buflen;
|
||||||
|
|
||||||
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= header->hdr_target_count;
|
||||||
|
|
||||||
|
/* figure out how big a buffer we need */
|
||||||
|
ompi_ddt_type_size(primitive_datatype, &buflen);
|
||||||
|
buflen *= primitive_count;
|
||||||
|
|
||||||
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
payload = (void*) malloc(buflen);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
||||||
|
primitive_datatype,
|
||||||
|
primitive_count,
|
||||||
|
buffer,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*)payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
buffer = payload;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
buffer = payload;
|
||||||
|
#endif
|
||||||
|
/* copy the data from the temporary buffer into the user window */
|
||||||
|
ret = ompi_osc_base_process_op(target,
|
||||||
|
buffer,
|
||||||
|
header->hdr_msg_length,
|
||||||
|
datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
op);
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
||||||
|
if (NULL == buffer) free(buffer);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* unlock the window for accumulates */
|
/* unlock the window for accumulates */
|
||||||
OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);
|
OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);
|
||||||
@ -676,13 +791,17 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
|
|||||||
header->hdr_origin));
|
header->hdr_origin));
|
||||||
} else {
|
} else {
|
||||||
ompi_osc_pt2pt_longreq_t *longreq;
|
ompi_osc_pt2pt_longreq_t *longreq;
|
||||||
ptrdiff_t lb, extent, true_lb, true_extent;
|
|
||||||
size_t buflen;
|
size_t buflen;
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
|
||||||
|
/* get underlying type... */
|
||||||
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= header->hdr_target_count;
|
||||||
|
|
||||||
/* figure out how big a buffer we need */
|
/* figure out how big a buffer we need */
|
||||||
ompi_ddt_get_extent(datatype, &lb, &extent);
|
ompi_ddt_type_size(primitive_datatype, &buflen);
|
||||||
ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
|
buflen *= primitive_count;
|
||||||
buflen = true_extent + (header->hdr_target_count - 1) * extent;
|
|
||||||
|
|
||||||
/* get a longreq and fill it in */
|
/* get a longreq and fill it in */
|
||||||
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
||||||
@ -702,8 +821,8 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
|
|||||||
((ompi_osc_pt2pt_send_header_t*) longreq->mpireq.cbdata)->hdr_msg_length = buflen;
|
((ompi_osc_pt2pt_send_header_t*) longreq->mpireq.cbdata)->hdr_msg_length = buflen;
|
||||||
|
|
||||||
ret = mca_pml.pml_irecv(((char*) longreq->mpireq.cbdata) + sizeof(ompi_osc_pt2pt_send_header_t),
|
ret = mca_pml.pml_irecv(((char*) longreq->mpireq.cbdata) + sizeof(ompi_osc_pt2pt_send_header_t),
|
||||||
header->hdr_target_count,
|
primitive_count,
|
||||||
datatype,
|
primitive_datatype,
|
||||||
header->hdr_origin,
|
header->hdr_origin,
|
||||||
header->hdr_origin_tag,
|
header->hdr_origin_tag,
|
||||||
module->p2p_comm,
|
module->p2p_comm,
|
||||||
|
@ -1,96 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* utility functions for dealing with remote datatype and op structures
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "ompi/op/op.h"
|
|
||||||
|
|
||||||
#include "osc_pt2pt.h"
|
|
||||||
#include "osc_pt2pt_sendreq.h"
|
|
||||||
#include "osc_pt2pt_header.h"
|
|
||||||
#include "osc_pt2pt_obj_convert.h"
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_osc_pt2pt_process_op(ompi_osc_pt2pt_module_t *module,
|
|
||||||
ompi_osc_pt2pt_send_header_t *header,
|
|
||||||
struct ompi_datatype_t *datatype,
|
|
||||||
ompi_op_t *op,
|
|
||||||
void *inbuf,
|
|
||||||
size_t inbuflen)
|
|
||||||
{
|
|
||||||
unsigned char *target_buffer;
|
|
||||||
|
|
||||||
/* compute target buffer location */
|
|
||||||
target_buffer = (unsigned char*) module->p2p_win->w_baseptr +
|
|
||||||
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
|
||||||
|
|
||||||
/* BWB - fix me - change back to the pointer comparison when the
|
|
||||||
replace o_f_to_c_index is set properly */
|
|
||||||
/* if (op == &ompi_mpi_op_replace) { */
|
|
||||||
if (header->hdr_target_op == ompi_mpi_op_replace.o_f_to_c_index) {
|
|
||||||
ompi_convertor_t convertor;
|
|
||||||
struct iovec iov;
|
|
||||||
uint32_t iov_count = 1;
|
|
||||||
size_t max_data;
|
|
||||||
ompi_proc_t *proc;
|
|
||||||
|
|
||||||
/* create convertor */
|
|
||||||
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
|
||||||
|
|
||||||
/* initialize convertor */
|
|
||||||
proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin);
|
|
||||||
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
|
||||||
datatype,
|
|
||||||
header->hdr_target_count,
|
|
||||||
target_buffer,
|
|
||||||
0,
|
|
||||||
&convertor);
|
|
||||||
|
|
||||||
/* short circuit the reduction operation MPI_REPLACE - it just
|
|
||||||
replaces the data, so push it out into the user's buffer.
|
|
||||||
This lets us avoid both the overhead of using the op
|
|
||||||
invocation and dealing with non-contiguous reductions
|
|
||||||
(since there are never user-defined reductions in
|
|
||||||
MPI_ACCUMULATE) */
|
|
||||||
iov.iov_len = inbuflen;
|
|
||||||
iov.iov_base = (IOVBASE_TYPE*)inbuf;
|
|
||||||
max_data = iov.iov_len;
|
|
||||||
ompi_convertor_unpack(&convertor,
|
|
||||||
&iov,
|
|
||||||
&iov_count,
|
|
||||||
&max_data);
|
|
||||||
OBJ_DESTRUCT(&convertor);
|
|
||||||
} else {
|
|
||||||
/* reductions other than MPI_REPLACE. Since user-defined
|
|
||||||
reductions aren't allowed, these all have to be over
|
|
||||||
contigous data. We make sure to only send complete
|
|
||||||
datatypes in these cases, so we can unpack directly from
|
|
||||||
the user buffer*/
|
|
||||||
/* BWB - FIX ME - this won't work if endianness is different.
|
|
||||||
Talk to George about a ddt function that allows us to fix
|
|
||||||
endianness "in place' or what else we could do here to keep
|
|
||||||
performance from sucking... */
|
|
||||||
|
|
||||||
ompi_op_reduce(op, inbuf, target_buffer, header->hdr_target_count,
|
|
||||||
datatype);
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
@ -1,51 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* utility functions for dealing with remote datatype and op structures
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
|
|
||||||
#include "ompi/datatype/datatype.h"
|
|
||||||
|
|
||||||
static inline
|
|
||||||
struct ompi_datatype_t*
|
|
||||||
ompi_osc_pt2pt_datatype_create(ompi_proc_t *remote_proc, void **payload)
|
|
||||||
{
|
|
||||||
struct ompi_datatype_t *datatype =
|
|
||||||
ompi_ddt_create_from_packed_description(payload, remote_proc);
|
|
||||||
if (NULL == datatype) return NULL;
|
|
||||||
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
|
|
||||||
return datatype;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
|
||||||
ompi_op_t *
|
|
||||||
ompi_osc_pt2pt_op_create(int op_id)
|
|
||||||
{
|
|
||||||
ompi_op_t *op = MPI_Op_f2c(op_id);
|
|
||||||
OBJ_RETAIN(op);
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int ompi_osc_pt2pt_process_op(ompi_osc_pt2pt_module_t *module,
|
|
||||||
ompi_osc_pt2pt_send_header_t *header,
|
|
||||||
struct ompi_datatype_t *datatype,
|
|
||||||
ompi_op_t *op,
|
|
||||||
void *inbuf,
|
|
||||||
size_t inbuflen);
|
|
@ -29,7 +29,6 @@ pt2pt_sources = \
|
|||||||
osc_rdma_longreq.h \
|
osc_rdma_longreq.h \
|
||||||
osc_rdma_longreq.c \
|
osc_rdma_longreq.c \
|
||||||
osc_rdma_obj_convert.h \
|
osc_rdma_obj_convert.h \
|
||||||
osc_rdma_obj_convert.c \
|
|
||||||
osc_rdma_replyreq.h \
|
osc_rdma_replyreq.h \
|
||||||
osc_rdma_replyreq.c \
|
osc_rdma_replyreq.c \
|
||||||
osc_rdma_sendreq.h \
|
osc_rdma_sendreq.h \
|
||||||
|
@ -62,14 +62,6 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
|
|||||||
OMPI_WIN_EXPOSE_EPOCH);
|
OMPI_WIN_EXPOSE_EPOCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op != &ompi_mpi_op_replace &&
|
|
||||||
!ompi_ddt_is_predefined(target_dt)) {
|
|
||||||
fprintf(stderr, "MPI_Accumulate currently does not support reductions\n");
|
|
||||||
fprintf(stderr, "with any user-defined types. This will be rectified\n");
|
|
||||||
fprintf(stderr, "in a future release.\n");
|
|
||||||
return MPI_ERR_UNSUPPORTED_OPERATION;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* shortcut 0 count case */
|
/* shortcut 0 count case */
|
||||||
if (0 == origin_count || 0 == target_count) {
|
if (0 == origin_count || 0 == target_count) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -24,8 +24,8 @@
|
|||||||
#include "osc_rdma_sendreq.h"
|
#include "osc_rdma_sendreq.h"
|
||||||
#include "osc_rdma_replyreq.h"
|
#include "osc_rdma_replyreq.h"
|
||||||
#include "osc_rdma_header.h"
|
#include "osc_rdma_header.h"
|
||||||
#include "osc_rdma_obj_convert.h"
|
|
||||||
#include "osc_rdma_data_move.h"
|
#include "osc_rdma_data_move.h"
|
||||||
|
#include "osc_rdma_obj_convert.h"
|
||||||
|
|
||||||
#include "opal/threads/condition.h"
|
#include "opal/threads/condition.h"
|
||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
@ -33,6 +33,7 @@
|
|||||||
#include "ompi/communicator/communicator.h"
|
#include "ompi/communicator/communicator.h"
|
||||||
#include "ompi/mca/osc/osc.h"
|
#include "ompi/mca/osc/osc.h"
|
||||||
#include "ompi/mca/osc/base/base.h"
|
#include "ompi/mca/osc/base/base.h"
|
||||||
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
#include "ompi/mca/bml/bml.h"
|
#include "ompi/mca/bml/bml.h"
|
||||||
#include "ompi/mca/pml/pml.h"
|
#include "ompi/mca/pml/pml.h"
|
||||||
@ -613,7 +614,7 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
|
|||||||
|
|
||||||
/* create or get a pointer to our datatype */
|
/* create or get a pointer to our datatype */
|
||||||
proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
||||||
datatype = ompi_osc_rdma_datatype_create(proc, &payload);
|
datatype = ompi_osc_base_datatype_create(proc, &payload);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#include "ompi/mca/bml/base/base.h"
|
#include "ompi/mca/bml/base/base.h"
|
||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
#include "ompi/mca/osc/base/base.h"
|
#include "ompi/mca/osc/base/base.h"
|
||||||
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
||||||
#include "ompi/datatype/datatype.h"
|
#include "ompi/datatype/datatype.h"
|
||||||
#include "ompi/datatype/dt_arch.h"
|
#include "ompi/datatype/dt_arch.h"
|
||||||
|
|
||||||
@ -803,7 +804,7 @@ ompi_osc_rdma_sendreq_recv_put(ompi_osc_rdma_module_t *module,
|
|||||||
(header->hdr_target_disp * module->m_win->w_disp_unit);
|
(header->hdr_target_disp * module->m_win->w_disp_unit);
|
||||||
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
||||||
struct ompi_datatype_t *datatype =
|
struct ompi_datatype_t *datatype =
|
||||||
ompi_osc_rdma_datatype_create(proc, inbuf);
|
ompi_osc_base_datatype_create(proc, inbuf);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
@ -887,6 +888,8 @@ ompi_osc_rdma_sendreq_recv_put(ompi_osc_rdma_module_t *module,
|
|||||||
* Receive an accumulate on the target side
|
* Receive an accumulate on the target side
|
||||||
*
|
*
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ompi_osc_rdma_sendreq_recv_accum_long_cb(ompi_osc_rdma_longreq_t *longreq)
|
ompi_osc_rdma_sendreq_recv_accum_long_cb(ompi_osc_rdma_longreq_t *longreq)
|
||||||
{
|
{
|
||||||
@ -894,17 +897,48 @@ ompi_osc_rdma_sendreq_recv_accum_long_cb(ompi_osc_rdma_longreq_t *longreq)
|
|||||||
(ompi_osc_rdma_send_header_t*) longreq->cbdata;
|
(ompi_osc_rdma_send_header_t*) longreq->cbdata;
|
||||||
void *payload = (void*) (header + 1);
|
void *payload = (void*) (header + 1);
|
||||||
int ret;
|
int ret;
|
||||||
|
ompi_osc_rdma_module_t *module = longreq->req_module;
|
||||||
|
unsigned char *target_buffer =
|
||||||
|
(unsigned char*) module->m_win->w_baseptr +
|
||||||
|
(header->hdr_target_disp * module->m_win->w_disp_unit);
|
||||||
|
|
||||||
/* lock the window for accumulates */
|
/* lock the window for accumulates */
|
||||||
OPAL_THREAD_LOCK(&longreq->req_module->m_acc_lock);
|
OPAL_THREAD_LOCK(&longreq->req_module->m_acc_lock);
|
||||||
|
|
||||||
/* copy the data from the temporary buffer into the user window */
|
if (longreq->req_op == &ompi_mpi_op_replace) {
|
||||||
ret = ompi_osc_rdma_process_op(longreq->req_module,
|
ompi_convertor_t convertor;
|
||||||
header,
|
struct iovec iov;
|
||||||
longreq->req_datatype,
|
uint32_t iov_count = 1;
|
||||||
longreq->req_op,
|
size_t max_data;
|
||||||
payload,
|
|
||||||
header->hdr_msg_length);
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
||||||
|
longreq->req_datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
target_buffer,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*) payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
/* copy the data from the temporary buffer into the user window */
|
||||||
|
ret = ompi_osc_base_process_op(target_buffer,
|
||||||
|
payload,
|
||||||
|
header->hdr_msg_length,
|
||||||
|
longreq->req_datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
longreq->req_op);
|
||||||
|
}
|
||||||
|
|
||||||
/* unlock the window for accumulates */
|
/* unlock the window for accumulates */
|
||||||
OPAL_THREAD_UNLOCK(&longreq->req_module->m_acc_lock);
|
OPAL_THREAD_UNLOCK(&longreq->req_module->m_acc_lock);
|
||||||
@ -933,10 +967,10 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
|
|||||||
void **payload)
|
void **payload)
|
||||||
{
|
{
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
struct ompi_op_t *op = ompi_osc_rdma_op_create(header->hdr_target_op);
|
struct ompi_op_t *op = ompi_osc_base_op_create(header->hdr_target_op);
|
||||||
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
|
||||||
struct ompi_datatype_t *datatype =
|
struct ompi_datatype_t *datatype =
|
||||||
ompi_osc_rdma_datatype_create(proc, payload);
|
ompi_osc_base_datatype_create(proc, payload);
|
||||||
|
|
||||||
if (NULL == datatype) {
|
if (NULL == datatype) {
|
||||||
opal_output(ompi_osc_base_output,
|
opal_output(ompi_osc_base_output,
|
||||||
@ -945,12 +979,100 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (header->hdr_msg_length > 0) {
|
if (header->hdr_msg_length > 0) {
|
||||||
|
unsigned char *target_buffer;
|
||||||
|
|
||||||
|
target_buffer = (unsigned char*) module->m_win->w_baseptr +
|
||||||
|
(header->hdr_target_disp * module->m_win->w_disp_unit);
|
||||||
|
|
||||||
/* lock the window for accumulates */
|
/* lock the window for accumulates */
|
||||||
OPAL_THREAD_LOCK(&module->m_acc_lock);
|
OPAL_THREAD_LOCK(&module->m_acc_lock);
|
||||||
|
|
||||||
/* copy the data from the temporary buffer into the user window */
|
if (op == &ompi_mpi_op_replace) {
|
||||||
ret = ompi_osc_rdma_process_op(module, header, datatype, op, *payload,
|
ompi_convertor_t convertor;
|
||||||
header->hdr_msg_length);
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data;
|
||||||
|
|
||||||
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
||||||
|
datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
target_buffer,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*)*payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
void *buffer = NULL;
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
||||||
|
ompi_convertor_t convertor;
|
||||||
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data;
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
size_t buflen;
|
||||||
|
|
||||||
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= header->hdr_target_count;
|
||||||
|
|
||||||
|
/* figure out how big a buffer we need */
|
||||||
|
ompi_ddt_type_size(primitive_datatype, &buflen);
|
||||||
|
buflen *= primitive_count;
|
||||||
|
|
||||||
|
/* create convertor */
|
||||||
|
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
||||||
|
|
||||||
|
payload = (void*) malloc(buflen);
|
||||||
|
|
||||||
|
/* initialize convertor */
|
||||||
|
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
||||||
|
primitive_datatype,
|
||||||
|
primitive_count,
|
||||||
|
buffer,
|
||||||
|
0,
|
||||||
|
&convertor);
|
||||||
|
|
||||||
|
iov.iov_len = header->hdr_msg_length;
|
||||||
|
iov.iov_base = (IOVBASE_TYPE*)*payload;
|
||||||
|
max_data = iov.iov_len;
|
||||||
|
ompi_convertor_unpack(&convertor,
|
||||||
|
&iov,
|
||||||
|
&iov_count,
|
||||||
|
&max_data);
|
||||||
|
OBJ_DESTRUCT(&convertor);
|
||||||
|
} else {
|
||||||
|
buffer = *payload;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
buffer = *payload;
|
||||||
|
#endif
|
||||||
|
/* copy the data from the temporary buffer into the user window */
|
||||||
|
ret = ompi_osc_base_process_op(target_buffer,
|
||||||
|
buffer,
|
||||||
|
header->hdr_msg_length,
|
||||||
|
datatype,
|
||||||
|
header->hdr_target_count,
|
||||||
|
op);
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
||||||
|
if (NULL == buffer) free(buffer);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* unlock the window for accumulates */
|
/* unlock the window for accumulates */
|
||||||
OPAL_THREAD_UNLOCK(&module->m_acc_lock);
|
OPAL_THREAD_UNLOCK(&module->m_acc_lock);
|
||||||
@ -969,13 +1091,17 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
ompi_osc_rdma_longreq_t *longreq;
|
ompi_osc_rdma_longreq_t *longreq;
|
||||||
ptrdiff_t lb, extent, true_lb, true_extent;
|
|
||||||
size_t buflen;
|
size_t buflen;
|
||||||
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
|
|
||||||
|
/* get underlying type... */
|
||||||
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= header->hdr_target_count;
|
||||||
|
|
||||||
/* figure out how big a buffer we need */
|
/* figure out how big a buffer we need */
|
||||||
ompi_ddt_get_extent(datatype, &lb, &extent);
|
ompi_ddt_type_size(primitive_datatype, &buflen);
|
||||||
ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
|
buflen *= primitive_count;
|
||||||
buflen = true_extent + (header->hdr_target_count - 1) * extent;
|
|
||||||
|
|
||||||
/* get a longreq and fill it in */
|
/* get a longreq and fill it in */
|
||||||
ompi_osc_rdma_longreq_alloc(&longreq);
|
ompi_osc_rdma_longreq_alloc(&longreq);
|
||||||
@ -995,8 +1121,8 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
|
|||||||
((ompi_osc_rdma_send_header_t*) longreq->cbdata)->hdr_msg_length = buflen;
|
((ompi_osc_rdma_send_header_t*) longreq->cbdata)->hdr_msg_length = buflen;
|
||||||
|
|
||||||
ret = mca_pml.pml_irecv(((char*) longreq->cbdata) + sizeof(ompi_osc_rdma_send_header_t),
|
ret = mca_pml.pml_irecv(((char*) longreq->cbdata) + sizeof(ompi_osc_rdma_send_header_t),
|
||||||
header->hdr_target_count,
|
primitive_count,
|
||||||
datatype,
|
primitive_datatype,
|
||||||
header->hdr_origin,
|
header->hdr_origin,
|
||||||
header->hdr_origin_tag,
|
header->hdr_origin_tag,
|
||||||
module->m_comm,
|
module->m_comm,
|
||||||
|
@ -1,98 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* utility functions for dealing with remote datatype and op structures
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "ompi/op/op.h"
|
|
||||||
|
|
||||||
#include "osc_rdma.h"
|
|
||||||
#include "osc_rdma_sendreq.h"
|
|
||||||
#include "osc_rdma_header.h"
|
|
||||||
#include "osc_rdma_obj_convert.h"
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_osc_rdma_process_op(ompi_osc_rdma_module_t *module,
|
|
||||||
ompi_osc_rdma_send_header_t *header,
|
|
||||||
struct ompi_datatype_t *datatype,
|
|
||||||
ompi_op_t *op,
|
|
||||||
void *inbuf,
|
|
||||||
size_t inbuflen)
|
|
||||||
{
|
|
||||||
unsigned char *target_buffer;
|
|
||||||
|
|
||||||
/* compute target buffer location */
|
|
||||||
target_buffer = (unsigned char*) module->m_win->w_baseptr +
|
|
||||||
(header->hdr_target_disp * module->m_win->w_disp_unit);
|
|
||||||
|
|
||||||
/* BWB - fix me - change back to the pointer comparison when the
|
|
||||||
replace o_f_to_c_index is set properly */
|
|
||||||
/* if (op == &ompi_mpi_op_replace) { */
|
|
||||||
if (header->hdr_target_op == ompi_mpi_op_replace.o_f_to_c_index) {
|
|
||||||
ompi_convertor_t convertor;
|
|
||||||
struct iovec iov;
|
|
||||||
uint32_t iov_count = 1;
|
|
||||||
size_t max_data;
|
|
||||||
ompi_proc_t *proc;
|
|
||||||
|
|
||||||
/* create convertor */
|
|
||||||
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
|
|
||||||
|
|
||||||
/* initialize convertor */
|
|
||||||
proc = ompi_comm_peer_lookup(module->m_comm, header->hdr_origin);
|
|
||||||
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
|
||||||
datatype,
|
|
||||||
header->hdr_target_count,
|
|
||||||
target_buffer,
|
|
||||||
0,
|
|
||||||
&convertor);
|
|
||||||
|
|
||||||
/* short circuit the reduction operation MPI_REPLACE - it just
|
|
||||||
replaces the data, so push it out into the user's buffer.
|
|
||||||
This lets us avoid both the overhead of using the op
|
|
||||||
invocation and dealing with non-contiguous reductions
|
|
||||||
(since there are never user-defined reductions in
|
|
||||||
MPI_ACCUMULATE) */
|
|
||||||
iov.iov_len = inbuflen;
|
|
||||||
iov.iov_base = (IOVBASE_TYPE*)inbuf;
|
|
||||||
max_data = iov.iov_len;
|
|
||||||
ompi_convertor_unpack(&convertor,
|
|
||||||
&iov,
|
|
||||||
&iov_count,
|
|
||||||
&max_data);
|
|
||||||
OBJ_DESTRUCT(&convertor);
|
|
||||||
} else {
|
|
||||||
/* reductions other than MPI_REPLACE. Since user-defined
|
|
||||||
reductions aren't allowed, these all have to be over
|
|
||||||
contigous data. We make sure to only send complete
|
|
||||||
datatypes in these cases, so we can unpack directly from
|
|
||||||
the user buffer*/
|
|
||||||
/* BWB - FIX ME - this won't work if endianness is different.
|
|
||||||
Talk to George about a ddt function that allows us to fix
|
|
||||||
endianness "in place' or what else we could do here to keep
|
|
||||||
performance from sucking... */
|
|
||||||
|
|
||||||
ompi_op_reduce(op, inbuf, target_buffer, header->hdr_target_count,
|
|
||||||
datatype);
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
@ -20,39 +20,6 @@
|
|||||||
* utility functions for dealing with remote datatype and op structures
|
* utility functions for dealing with remote datatype and op structures
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
|
|
||||||
#include "ompi/datatype/datatype.h"
|
|
||||||
|
|
||||||
static inline
|
|
||||||
struct ompi_datatype_t*
|
|
||||||
ompi_osc_rdma_datatype_create(ompi_proc_t *remote_proc, void **payload)
|
|
||||||
{
|
|
||||||
struct ompi_datatype_t *datatype =
|
|
||||||
ompi_ddt_create_from_packed_description(payload, remote_proc);
|
|
||||||
if (NULL == datatype) return NULL;
|
|
||||||
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
|
|
||||||
return datatype;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static inline
|
|
||||||
ompi_op_t *
|
|
||||||
ompi_osc_rdma_op_create(int op_id)
|
|
||||||
{
|
|
||||||
ompi_op_t *op = MPI_Op_f2c(op_id);
|
|
||||||
OBJ_RETAIN(op);
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int ompi_osc_rdma_process_op(ompi_osc_rdma_module_t *module,
|
|
||||||
ompi_osc_rdma_send_header_t *header,
|
|
||||||
struct ompi_datatype_t *datatype,
|
|
||||||
ompi_op_t *op,
|
|
||||||
void *inbuf,
|
|
||||||
size_t inbuflen);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a window index number into a module instance.
|
* Convert a window index number into a module instance.
|
||||||
*/
|
*/
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user