1
1

Support reduction operations other than MPI_REPLACE for user-defined

datatypes with MPI_ACCUMULATE

This commit was SVN r15418.
Этот коммит содержится в:
Brian Barrett 2007-07-13 20:46:12 +00:00
родитель bfe682b6c7
Коммит 7a9a8c7e17
15 изменённых файлов: 643 добавлений и 342 удалений

Просмотреть файл

@ -15,9 +15,11 @@
#
headers += \
base/base.h
base/base.h \
base/osc_base_obj_convert.h
libmca_osc_la_SOURCES += \
base/osc_base_close.c \
base/osc_base_open.c \
base/osc_base_init.c
base/osc_base_init.c \
base/osc_base_obj_convert.c

201
ompi/mca/osc/base/osc_base_obj_convert.c Обычный файл
Просмотреть файл

@ -0,0 +1,201 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* utility functions for dealing with remote datatype and op structures
*/
#include "ompi_config.h"
#include "ompi/op/op.h"
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/datatype_internal.h"
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/convertor_internal.h"
#include "ompi/datatype/datatype_prototypes.h"
#include "osc_base_obj_convert.h"
struct ompi_osc_base_convertor_t {
ompi_convertor_t convertor;
ompi_op_t *op;
ompi_datatype_t *datatype;
};
typedef struct ompi_osc_base_convertor_t ompi_osc_base_convertor_t;
static OBJ_CLASS_INSTANCE(ompi_osc_base_convertor_t, ompi_convertor_t, NULL, NULL);
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( ompi_convertor_t *pConvertor, uint32_t count, \
char* from, size_t from_len, ptrdiff_t from_extent, \
char* to, size_t to_len, ptrdiff_t to_extent, \
ptrdiff_t *advance) \
{ \
ompi_osc_base_convertor_t *osc_convertor = \
(ompi_osc_base_convertor_t*) pConvertor; \
\
ompi_op_reduce(osc_convertor->op, from, to, count, osc_convertor->datatype); \
*advance = count * from_extent; \
return count; \
}
/* set up copy functions for the basic C MPI data types */
COPY_TYPE( char, char, 1 )
COPY_TYPE( short, short, 1 )
COPY_TYPE( int, int, 1 )
COPY_TYPE( long, long, 1 )
COPY_TYPE( long_long, long long, 1 )
COPY_TYPE( float, float, 1 )
COPY_TYPE( double, double, 1 )
COPY_TYPE( long_double, long double, 1 )
COPY_TYPE( complex_float, ompi_complex_float_t, 1 )
COPY_TYPE( complex_double, ompi_complex_double_t, 1 )
COPY_TYPE( complex_long_double, ompi_complex_long_double_t, 1 )
/* table of predefined copy functions - one for each MPI type */
static conversion_fct_t ompi_osc_base_copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_END_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_CHARACTER */
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_SIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_int, /* DT_UNSIGNED_INT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG_INT */
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_PACKED */
(conversion_fct_t)NULL, /* DT_WCHAR */
#if SIZEOF_BOOL == SIZEOF_CHAR
(conversion_fct_t)copy_char, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_SHORT
(conversion_fct_t)copy_short, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_INT
(conversion_fct_t)copy_int, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == SIZEOF_LONG
(conversion_fct_t)copy_long, /* DT_CXX_BOOL */
#else
(conversion_fct_t)NULL, /* DT_CXX_BOOL */
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_CHAR
(conversion_fct_t)copy_char, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_SHORT
(conversion_fct_t)copy_short, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT
(conversion_fct_t)copy_int, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_LONG
(conversion_fct_t)copy_long, /* DT_LOGIC */
#else
(conversion_fct_t)NULL, /* DT_LOGIC */
#endif
(conversion_fct_t)copy_int, /* DT_INTEGER */
(conversion_fct_t)copy_float, /* DT_REAL */
(conversion_fct_t)copy_double, /* DT_DBLPREC */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_2INT */
(conversion_fct_t)NULL, /* DT_2INTEGER */
(conversion_fct_t)NULL, /* DT_2REAL */
(conversion_fct_t)NULL, /* DT_2DBLPREC */
(conversion_fct_t)NULL, /* DT_2COMPLEX */
(conversion_fct_t)NULL, /* DT_2DOUBLE_COMPLEX */
(conversion_fct_t)NULL, /* DT_FLOAT_INT */
(conversion_fct_t)NULL, /* DT_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_INT */
(conversion_fct_t)NULL, /* DT_SHORT_INT */
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
};
int
ompi_osc_base_process_op(void *outbuf,
void *inbuf,
size_t inbuflen,
struct ompi_datatype_t *datatype,
int count,
ompi_op_t *op)
{
if (op == &ompi_mpi_op_replace) {
return OMPI_ERR_NOT_SUPPORTED;
}
if (ompi_ddt_is_predefined(datatype)) {
ompi_op_reduce(op, inbuf, outbuf, count, datatype);
} else {
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
ompi_osc_base_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
struct ompi_convertor_master_t master;
int i, found_index = -1;
uint64_t mask = 1;
for (i = 0 ; i < DT_MAX_PREDEFINED ; ++i) {
if (datatype->bdt_used & mask) {
found_index = i;
break;
}
mask *= 2;
}
primitive_datatype = (ompi_datatype_t*)
ompi_ddt_basicDatatypes[found_index];
primitive_count = datatype->nbElems;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_osc_base_convertor_t);
convertor.op = op;
convertor.datatype = primitive_datatype;
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
datatype,
count,
outbuf,
0,
&convertor.convertor);
memcpy(&master, convertor.convertor.master, sizeof(struct ompi_convertor_master_t));
master.next = convertor.convertor.master;
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
convertor.convertor.master = &master;
convertor.convertor.fAdvance = ompi_unpack_general;
iov.iov_len = inbuflen;
iov.iov_base = (IOVBASE_TYPE*) inbuf;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor.convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
}
return OMPI_SUCCESS;
}

149
ompi/mca/osc/base/osc_base_obj_convert.h Обычный файл
Просмотреть файл

@ -0,0 +1,149 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Utility functions for Open MPI object manipulation by the One-sided code
*
* Utility functions for creating / finding handles for Open MPI
* objects, usually based on indexes sent from remote peers.
*/
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/datatype_internal.h"
#include "ompi/proc/proc.h"
#include "ompi/op/op.h"
BEGIN_C_DECLS
/**
* Create datatype based on packed payload
*
* Create a useable MPI datatype based on it's packed description.
* The datatype is owned by the calling process and must be
* OBJ_RELEASEd when no longer in use.
*
* @param remote_proc The ompi_proc_t pointer for the remote process
* @param payload A pointer to the pointer to the payload. The
* pointer to the payload will be moved past the
* datatype information upon successful return.
*
* @retval NULL A failure occrred
* @retval non-NULL A fully operational datatype
*/
static inline
struct ompi_datatype_t*
ompi_osc_base_datatype_create(ompi_proc_t *remote_proc, void **payload)
{
struct ompi_datatype_t *datatype =
ompi_ddt_create_from_packed_description(payload, remote_proc);
if (NULL == datatype) return NULL;
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
return datatype;
}
/**
* Create datatype based on Fortran Index
*
* Create a useable MPI datatype based on it's Fortran index, which is
* globally the same for predefined operations. The op handle is
* owned by the calling process and must be OBJ_RELEASEd when no
* longer in use.
*
* @param op_id The fortran index for the operaton
*
* @retval NULL A failure occrred
* @retval non-NULL An op handle
*/
static inline
ompi_op_t *
ompi_osc_base_op_create(int op_id)
{
ompi_op_t *op = MPI_Op_f2c(op_id);
OBJ_RETAIN(op);
return op;
}
/**
* Get the primitive datatype information for a legal one-sided accumulate datatype
*
* Get the primitive datatype information for a legal one-sided
* accumulate datatype. This includes the primitive datatype used to
* build the datatype (there can be only one) and the number of
* instances of that primitive datatype in the datatype (there can be
* many).
*
* @param datatype legal one-sided datatype
* @param prim_datatype The primitive datatype used to build datatype
* @param prim_count Number of instances of prim_datattpe in datatype
*
* @retval OMPI_SUCCESS Success
*/
static inline int
ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype,
ompi_datatype_t **prim_datatype,
uint32_t *prim_count)
{
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
/* get underlying type... */
if (ompi_ddt_is_predefined(datatype)) {
primitive_datatype = datatype;
primitive_count = 1;
} else {
int i, found_index = -1;
uint64_t mask = 1;
for (i = 0 ; i < DT_MAX_PREDEFINED ; ++i) {
if (datatype->bdt_used & mask) {
found_index = i;
break;
}
mask *= 2;
}
primitive_datatype = (ompi_datatype_t*)
ompi_ddt_basicDatatypes[found_index];
primitive_count = datatype->nbElems;
}
*prim_datatype = primitive_datatype;
*prim_count = primitive_count;
return OMPI_SUCCESS;
}
/**
* Apply the operation specified from inbuf to outbut
*
* Apply the specified reduction operation from inbuf to outbuf.
* inbuf must contain count instances of datatype, in the local
* process's binary mode.
*
* @retval OMPI_SUCCESS Success
* @retval OMPI_ERR_NOT_SUPPORTED Called with op == ompi_mpi_op_replace
*/
int ompi_osc_base_process_op(void *outbuf,
void *inbuf,
size_t inbuflen,
struct ompi_datatype_t *datatype,
int count,
ompi_op_t *op);
END_C_DECLS

Просмотреть файл

@ -32,8 +32,6 @@ pt2pt_sources = \
osc_pt2pt_longreq.c \
osc_pt2pt_mpireq.h \
osc_pt2pt_mpireq.c \
osc_pt2pt_obj_convert.h \
osc_pt2pt_obj_convert.c \
osc_pt2pt_replyreq.h \
osc_pt2pt_replyreq.c \
osc_pt2pt_sendreq.h \

Просмотреть файл

@ -60,14 +60,6 @@ ompi_osc_pt2pt_module_accumulate(void *origin_addr, int origin_count,
OMPI_WIN_EXPOSE_EPOCH);
}
if (op != &ompi_mpi_op_replace &&
!ompi_ddt_is_predefined(target_dt)) {
fprintf(stderr, "MPI_Accumulate currently does not support reductions\n");
fprintf(stderr, "with any user-defined types. This will be rectified\n");
fprintf(stderr, "in a future release.\n");
return MPI_ERR_UNSUPPORTED_OPERATION;
}
/* shortcut 0 count case */
if (0 == origin_count || 0 == target_count) {
return OMPI_SUCCESS;

Просмотреть файл

@ -24,7 +24,6 @@
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_replyreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_obj_convert.h"
#include "osc_pt2pt_data_move.h"
#include "osc_pt2pt_buffer.h"
@ -33,6 +32,7 @@
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/datatype/dt_arch.h"
@ -535,7 +535,7 @@ component_fragment_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
/* create or get a pointer to our datatype */
proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
datatype = ompi_osc_pt2pt_datatype_create(proc, &payload);
datatype = ompi_osc_base_datatype_create(proc, &payload);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,

Просмотреть файл

@ -20,7 +20,6 @@
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "osc_pt2pt_obj_convert.h"
#include "osc_pt2pt_buffer.h"
#include "opal/util/output.h"
@ -29,6 +28,7 @@
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/dt_arch.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
static inline int32_t
@ -525,7 +525,7 @@ ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
struct ompi_datatype_t *datatype =
ompi_osc_pt2pt_datatype_create(proc, &inbuf);
ompi_osc_base_datatype_create(proc, &inbuf);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,
@ -599,21 +599,51 @@ ompi_osc_pt2pt_sendreq_recv_accum_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
{
ompi_osc_pt2pt_longreq_t *longreq =
(ompi_osc_pt2pt_longreq_t*) mpireq;
ompi_osc_pt2pt_module_t *module = longreq->req_module;
ompi_osc_pt2pt_send_header_t *header =
(ompi_osc_pt2pt_send_header_t*) mpireq->cbdata;
void *payload = (void*) (header + 1);
int ret;
void *target = (unsigned char*) module->p2p_win->w_baseptr +
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
/* lock the window for accumulates */
OPAL_THREAD_LOCK(&longreq->req_module->p2p_acc_lock);
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_pt2pt_process_op(longreq->req_module,
header,
longreq->req_datatype,
longreq->req_op,
payload,
header->hdr_msg_length);
if (longreq->req_op == &ompi_mpi_op_replace) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
longreq->req_datatype,
header->hdr_target_count,
target,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*) payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target,
payload,
header->hdr_msg_length,
longreq->req_datatype,
header->hdr_target_count,
longreq->req_op);
}
/* unlock the window for accumulates */
OPAL_THREAD_UNLOCK(&longreq->req_module->p2p_acc_lock);
@ -642,10 +672,12 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
void *payload)
{
int ret = OMPI_SUCCESS;
struct ompi_op_t *op = ompi_osc_pt2pt_op_create(header->hdr_target_op);
struct ompi_op_t *op = ompi_osc_base_op_create(header->hdr_target_op);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
struct ompi_datatype_t *datatype =
ompi_osc_pt2pt_datatype_create(proc, &payload);
ompi_osc_base_datatype_create(proc, &payload);
void *target = (unsigned char*) module->p2p_win->w_baseptr +
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,
@ -657,9 +689,92 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
/* lock the window for accumulates */
OPAL_THREAD_LOCK(&module->p2p_acc_lock);
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_pt2pt_process_op(module, header, datatype, op, payload,
header->hdr_msg_length);
if (op == &ompi_mpi_op_replace) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
datatype,
header->hdr_target_count,
target,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*)payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
void *buffer = NULL;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
size_t buflen;
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= header->hdr_target_count;
/* figure out how big a buffer we need */
ompi_ddt_type_size(primitive_datatype, &buflen);
buflen *= primitive_count;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
payload = (void*) malloc(buflen);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
primitive_datatype,
primitive_count,
buffer,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*)payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
buffer = payload;
}
#else
buffer = payload;
#endif
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target,
buffer,
header->hdr_msg_length,
datatype,
header->hdr_target_count,
op);
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
if (NULL == buffer) free(buffer);
}
#endif
}
/* unlock the window for accumulates */
OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);
@ -676,13 +791,17 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
header->hdr_origin));
} else {
ompi_osc_pt2pt_longreq_t *longreq;
ptrdiff_t lb, extent, true_lb, true_extent;
size_t buflen;
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
/* get underlying type... */
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= header->hdr_target_count;
/* figure out how big a buffer we need */
ompi_ddt_get_extent(datatype, &lb, &extent);
ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
buflen = true_extent + (header->hdr_target_count - 1) * extent;
ompi_ddt_type_size(primitive_datatype, &buflen);
buflen *= primitive_count;
/* get a longreq and fill it in */
ompi_osc_pt2pt_longreq_alloc(&longreq);
@ -702,8 +821,8 @@ ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
((ompi_osc_pt2pt_send_header_t*) longreq->mpireq.cbdata)->hdr_msg_length = buflen;
ret = mca_pml.pml_irecv(((char*) longreq->mpireq.cbdata) + sizeof(ompi_osc_pt2pt_send_header_t),
header->hdr_target_count,
datatype,
primitive_count,
primitive_datatype,
header->hdr_origin,
header->hdr_origin_tag,
module->p2p_comm,

Просмотреть файл

@ -1,96 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* utility functions for dealing with remote datatype and op structures
*/
#include "ompi_config.h"
#include "ompi/op/op.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_obj_convert.h"
int
ompi_osc_pt2pt_process_op(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_send_header_t *header,
struct ompi_datatype_t *datatype,
ompi_op_t *op,
void *inbuf,
size_t inbuflen)
{
unsigned char *target_buffer;
/* compute target buffer location */
target_buffer = (unsigned char*) module->p2p_win->w_baseptr +
(header->hdr_target_disp * module->p2p_win->w_disp_unit);
/* BWB - fix me - change back to the pointer comparison when the
replace o_f_to_c_index is set properly */
/* if (op == &ompi_mpi_op_replace) { */
if (header->hdr_target_op == ompi_mpi_op_replace.o_f_to_c_index) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
ompi_proc_t *proc;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin);
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
datatype,
header->hdr_target_count,
target_buffer,
0,
&convertor);
/* short circuit the reduction operation MPI_REPLACE - it just
replaces the data, so push it out into the user's buffer.
This lets us avoid both the overhead of using the op
invocation and dealing with non-contiguous reductions
(since there are never user-defined reductions in
MPI_ACCUMULATE) */
iov.iov_len = inbuflen;
iov.iov_base = (IOVBASE_TYPE*)inbuf;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
/* reductions other than MPI_REPLACE. Since user-defined
reductions aren't allowed, these all have to be over
contigous data. We make sure to only send complete
datatypes in these cases, so we can unpack directly from
the user buffer*/
/* BWB - FIX ME - this won't work if endianness is different.
Talk to George about a ddt function that allows us to fix
endianness "in place' or what else we could do here to keep
performance from sucking... */
ompi_op_reduce(op, inbuf, target_buffer, header->hdr_target_count,
datatype);
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,51 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* utility functions for dealing with remote datatype and op structures
*/
#include "mpi.h"
#include "ompi/datatype/datatype.h"
static inline
struct ompi_datatype_t*
ompi_osc_pt2pt_datatype_create(ompi_proc_t *remote_proc, void **payload)
{
struct ompi_datatype_t *datatype =
ompi_ddt_create_from_packed_description(payload, remote_proc);
if (NULL == datatype) return NULL;
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
return datatype;
}
static inline
ompi_op_t *
ompi_osc_pt2pt_op_create(int op_id)
{
ompi_op_t *op = MPI_Op_f2c(op_id);
OBJ_RETAIN(op);
return op;
}
int ompi_osc_pt2pt_process_op(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_send_header_t *header,
struct ompi_datatype_t *datatype,
ompi_op_t *op,
void *inbuf,
size_t inbuflen);

Просмотреть файл

@ -29,7 +29,6 @@ pt2pt_sources = \
osc_rdma_longreq.h \
osc_rdma_longreq.c \
osc_rdma_obj_convert.h \
osc_rdma_obj_convert.c \
osc_rdma_replyreq.h \
osc_rdma_replyreq.c \
osc_rdma_sendreq.h \

Просмотреть файл

@ -62,14 +62,6 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
OMPI_WIN_EXPOSE_EPOCH);
}
if (op != &ompi_mpi_op_replace &&
!ompi_ddt_is_predefined(target_dt)) {
fprintf(stderr, "MPI_Accumulate currently does not support reductions\n");
fprintf(stderr, "with any user-defined types. This will be rectified\n");
fprintf(stderr, "in a future release.\n");
return MPI_ERR_UNSUPPORTED_OPERATION;
}
/* shortcut 0 count case */
if (0 == origin_count || 0 == target_count) {
return OMPI_SUCCESS;

Просмотреть файл

@ -24,8 +24,8 @@
#include "osc_rdma_sendreq.h"
#include "osc_rdma_replyreq.h"
#include "osc_rdma_header.h"
#include "osc_rdma_obj_convert.h"
#include "osc_rdma_data_move.h"
#include "osc_rdma_obj_convert.h"
#include "opal/threads/condition.h"
#include "opal/threads/mutex.h"
@ -33,6 +33,7 @@
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/pml/pml.h"
@ -613,7 +614,7 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
/* create or get a pointer to our datatype */
proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
datatype = ompi_osc_rdma_datatype_create(proc, &payload);
datatype = ompi_osc_base_datatype_create(proc, &payload);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,

Просмотреть файл

@ -30,6 +30,7 @@
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/dt_arch.h"
@ -803,7 +804,7 @@ ompi_osc_rdma_sendreq_recv_put(ompi_osc_rdma_module_t *module,
(header->hdr_target_disp * module->m_win->w_disp_unit);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
struct ompi_datatype_t *datatype =
ompi_osc_rdma_datatype_create(proc, inbuf);
ompi_osc_base_datatype_create(proc, inbuf);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,
@ -887,6 +888,8 @@ ompi_osc_rdma_sendreq_recv_put(ompi_osc_rdma_module_t *module,
* Receive an accumulate on the target side
*
**********************************************************************/
static void
ompi_osc_rdma_sendreq_recv_accum_long_cb(ompi_osc_rdma_longreq_t *longreq)
{
@ -894,17 +897,48 @@ ompi_osc_rdma_sendreq_recv_accum_long_cb(ompi_osc_rdma_longreq_t *longreq)
(ompi_osc_rdma_send_header_t*) longreq->cbdata;
void *payload = (void*) (header + 1);
int ret;
ompi_osc_rdma_module_t *module = longreq->req_module;
unsigned char *target_buffer =
(unsigned char*) module->m_win->w_baseptr +
(header->hdr_target_disp * module->m_win->w_disp_unit);
/* lock the window for accumulates */
OPAL_THREAD_LOCK(&longreq->req_module->m_acc_lock);
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_rdma_process_op(longreq->req_module,
header,
longreq->req_datatype,
longreq->req_op,
payload,
header->hdr_msg_length);
if (longreq->req_op == &ompi_mpi_op_replace) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
longreq->req_datatype,
header->hdr_target_count,
target_buffer,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*) payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target_buffer,
payload,
header->hdr_msg_length,
longreq->req_datatype,
header->hdr_target_count,
longreq->req_op);
}
/* unlock the window for accumulates */
OPAL_THREAD_UNLOCK(&longreq->req_module->m_acc_lock);
@ -933,10 +967,10 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
void **payload)
{
int ret = OMPI_SUCCESS;
struct ompi_op_t *op = ompi_osc_rdma_op_create(header->hdr_target_op);
struct ompi_op_t *op = ompi_osc_base_op_create(header->hdr_target_op);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->m_comm, header->hdr_origin );
struct ompi_datatype_t *datatype =
ompi_osc_rdma_datatype_create(proc, payload);
ompi_osc_base_datatype_create(proc, payload);
if (NULL == datatype) {
opal_output(ompi_osc_base_output,
@ -945,12 +979,100 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
}
if (header->hdr_msg_length > 0) {
unsigned char *target_buffer;
target_buffer = (unsigned char*) module->m_win->w_baseptr +
(header->hdr_target_disp * module->m_win->w_disp_unit);
/* lock the window for accumulates */
OPAL_THREAD_LOCK(&module->m_acc_lock);
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_rdma_process_op(module, header, datatype, op, *payload,
header->hdr_msg_length);
if (op == &ompi_mpi_op_replace) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
datatype,
header->hdr_target_count,
target_buffer,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*)*payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
void *buffer = NULL;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
size_t buflen;
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= header->hdr_target_count;
/* figure out how big a buffer we need */
ompi_ddt_type_size(primitive_datatype, &buflen);
buflen *= primitive_count;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
payload = (void*) malloc(buflen);
/* initialize convertor */
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
primitive_datatype,
primitive_count,
buffer,
0,
&convertor);
iov.iov_len = header->hdr_msg_length;
iov.iov_base = (IOVBASE_TYPE*)*payload;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
buffer = *payload;
}
#else
buffer = *payload;
#endif
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target_buffer,
buffer,
header->hdr_msg_length,
datatype,
header->hdr_target_count,
op);
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
if (NULL == buffer) free(buffer);
}
#endif
}
/* unlock the window for accumulates */
OPAL_THREAD_UNLOCK(&module->m_acc_lock);
@ -969,13 +1091,17 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
} else {
ompi_osc_rdma_longreq_t *longreq;
ptrdiff_t lb, extent, true_lb, true_extent;
size_t buflen;
struct ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
/* get underlying type... */
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= header->hdr_target_count;
/* figure out how big a buffer we need */
ompi_ddt_get_extent(datatype, &lb, &extent);
ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
buflen = true_extent + (header->hdr_target_count - 1) * extent;
ompi_ddt_type_size(primitive_datatype, &buflen);
buflen *= primitive_count;
/* get a longreq and fill it in */
ompi_osc_rdma_longreq_alloc(&longreq);
@ -995,8 +1121,8 @@ ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
((ompi_osc_rdma_send_header_t*) longreq->cbdata)->hdr_msg_length = buflen;
ret = mca_pml.pml_irecv(((char*) longreq->cbdata) + sizeof(ompi_osc_rdma_send_header_t),
header->hdr_target_count,
datatype,
primitive_count,
primitive_datatype,
header->hdr_origin,
header->hdr_origin_tag,
module->m_comm,

Просмотреть файл

@ -1,98 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* utility functions for dealing with remote datatype and op structures
*/
#include "ompi_config.h"
#include "ompi/op/op.h"
#include "osc_rdma.h"
#include "osc_rdma_sendreq.h"
#include "osc_rdma_header.h"
#include "osc_rdma_obj_convert.h"
int
ompi_osc_rdma_process_op(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_send_header_t *header,
struct ompi_datatype_t *datatype,
ompi_op_t *op,
void *inbuf,
size_t inbuflen)
{
unsigned char *target_buffer;
/* compute target buffer location */
target_buffer = (unsigned char*) module->m_win->w_baseptr +
(header->hdr_target_disp * module->m_win->w_disp_unit);
/* BWB - fix me - change back to the pointer comparison when the
replace o_f_to_c_index is set properly */
/* if (op == &ompi_mpi_op_replace) { */
if (header->hdr_target_op == ompi_mpi_op_replace.o_f_to_c_index) {
ompi_convertor_t convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
ompi_proc_t *proc;
/* create convertor */
OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
/* initialize convertor */
proc = ompi_comm_peer_lookup(module->m_comm, header->hdr_origin);
ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
datatype,
header->hdr_target_count,
target_buffer,
0,
&convertor);
/* short circuit the reduction operation MPI_REPLACE - it just
replaces the data, so push it out into the user's buffer.
This lets us avoid both the overhead of using the op
invocation and dealing with non-contiguous reductions
(since there are never user-defined reductions in
MPI_ACCUMULATE) */
iov.iov_len = inbuflen;
iov.iov_base = (IOVBASE_TYPE*)inbuf;
max_data = iov.iov_len;
ompi_convertor_unpack(&convertor,
&iov,
&iov_count,
&max_data);
OBJ_DESTRUCT(&convertor);
} else {
/* reductions other than MPI_REPLACE. Since user-defined
reductions aren't allowed, these all have to be over
contigous data. We make sure to only send complete
datatypes in these cases, so we can unpack directly from
the user buffer*/
/* BWB - FIX ME - this won't work if endianness is different.
Talk to George about a ddt function that allows us to fix
endianness "in place' or what else we could do here to keep
performance from sucking... */
ompi_op_reduce(op, inbuf, target_buffer, header->hdr_target_count,
datatype);
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -20,39 +20,6 @@
* utility functions for dealing with remote datatype and op structures
*/
#include "mpi.h"
#include "ompi/datatype/datatype.h"
static inline
struct ompi_datatype_t*
ompi_osc_rdma_datatype_create(ompi_proc_t *remote_proc, void **payload)
{
struct ompi_datatype_t *datatype =
ompi_ddt_create_from_packed_description(payload, remote_proc);
if (NULL == datatype) return NULL;
if (ompi_ddt_is_predefined(datatype)) OBJ_RETAIN(datatype);
return datatype;
}
static inline
ompi_op_t *
ompi_osc_rdma_op_create(int op_id)
{
ompi_op_t *op = MPI_Op_f2c(op_id);
OBJ_RETAIN(op);
return op;
}
int ompi_osc_rdma_process_op(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_send_header_t *header,
struct ompi_datatype_t *datatype,
ompi_op_t *op,
void *inbuf,
size_t inbuflen);
/**
* Convert a window index number into a module instance.
*/