Implement MPI_TYPE_CREATE_DARRAY function. Works with MPICH2 darray-pack
test, Sun's darray test, and an internal LANL test code. I would not assume it will work properly on other codes, as I'm still not sure I completely understand what the standard says this function is supposed to do. Refs trac:65 This commit was SVN r13967. The following Trac tickets were found above: Ticket 65 --> https://svn.open-mpi.org/trac/ompi/ticket/65
Этот коммит содержится в:
родитель
01512d6950
Коммит
e926bed69f
2
NEWS
2
NEWS
@ -47,6 +47,8 @@ Trunk (not on release branches yet)
|
||||
--> Expected 1.2.x
|
||||
- Recognize zsh in rsh pls
|
||||
--> Expected 1.2.x
|
||||
- Implement MPI_TYPE_CREATE_DARRAY function
|
||||
--> Expected 1.2.x
|
||||
|
||||
|
||||
1.2
|
||||
|
@ -10,6 +10,8 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -41,7 +43,6 @@ libdatatype_reliable_la_CFLAGS = -DCHECKSUM $(AM_CFLAGS)
|
||||
libdatatype_la_SOURCES = \
|
||||
dt_add.c \
|
||||
dt_create.c \
|
||||
dt_create_array.c \
|
||||
dt_create_dup.c \
|
||||
dt_create_indexed.c \
|
||||
dt_create_struct.c \
|
||||
|
@ -10,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -160,14 +162,6 @@ OMPI_DECLSPEC int32_t ompi_ddt_create_indexed_block( int count, int bLength, con
|
||||
OMPI_DECLSPEC int32_t ompi_ddt_create_struct( int count, const int* pBlockLength, const MPI_Aint* pDisp,
|
||||
ompi_datatype_t* const* pTypes, ompi_datatype_t** newType );
|
||||
OMPI_DECLSPEC int32_t ompi_ddt_create_resized( const ompi_datatype_t* oldType, MPI_Aint lb, MPI_Aint extent, ompi_datatype_t** newType );
|
||||
OMPI_DECLSPEC int32_t ompi_ddt_create_subarray( int ndims, const int* pSizes,
|
||||
const int* pSubSizes, const int* pStarts,
|
||||
int order, const ompi_datatype_t* oldType, ompi_datatype_t** newType );
|
||||
OMPI_DECLSPEC int32_t ompi_ddt_create_darray( int size, int rank, int ndims,
|
||||
const int* pGSizes, const int *pDistrib,
|
||||
const int* pDArgs, const int* pPSizes,
|
||||
int order, const ompi_datatype_t* oldType,
|
||||
ompi_datatype_t** newType );
|
||||
|
||||
OMPI_DECLSPEC int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd, uint32_t count,
|
||||
ptrdiff_t disp, ptrdiff_t extent );
|
||||
|
@ -1,35 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
|
||||
int32_t ompi_ddt_create_subarray( int ndims, const int* pSizes, const int* pSubSizes, const int* pStarts,
|
||||
int order, const ompi_datatype_t* oldType, ompi_datatype_t** newType )
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
int32_t ompi_ddt_create_darray( int size, int rank, int ndims, const int* pGSizes, const int *pDistrib,
|
||||
const int* pDArgs, const int* pPSizes, int order, const ompi_datatype_t* oldType,
|
||||
ompi_datatype_t** newType )
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -9,6 +9,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -31,35 +33,15 @@
|
||||
|
||||
static const char FUNC_NAME[] = "MPI_Type_create_darray";
|
||||
|
||||
static ompi_datatype_t* cyclic( int32_t darg, int32_t gsize, int32_t r, int32_t psize, ompi_datatype_t* oldtype )
|
||||
{
|
||||
int count, darg_last;
|
||||
static int block(const int *array_of_gsizes, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset);
|
||||
static int cyclic(const int *array_of_gsizes, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset);
|
||||
|
||||
{ /* compute the count */
|
||||
int nblocks, left_over;
|
||||
nblocks = (gsize + (darg - 1)) / darg;
|
||||
count = nblocks / psize;
|
||||
left_over = nblocks - count * psize;
|
||||
if( r < left_over )
|
||||
count++;
|
||||
}
|
||||
{ /* compute the darg_last */
|
||||
int32_t num_in_last_cyclic;
|
||||
if( 0 == (num_in_last_cyclic = gsize % (psize * darg)) ) {
|
||||
darg_last = darg;
|
||||
} else {
|
||||
darg_last = num_in_last_cyclic - darg * r;
|
||||
if( darg_last > darg )
|
||||
darg_last = darg;
|
||||
if( darg_last <= 0 )
|
||||
darg_last = darg;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
return &ompi_mpi_datatype_null;
|
||||
}
|
||||
|
||||
int MPI_Type_create_darray(int size,
|
||||
int rank,
|
||||
@ -73,8 +55,10 @@ int MPI_Type_create_darray(int size,
|
||||
MPI_Datatype *newtype)
|
||||
|
||||
{
|
||||
int32_t i, darg_i, step, end_loop, *r;
|
||||
ompi_datatype_t* temptype;
|
||||
ompi_datatype_t *lastType;
|
||||
ptrdiff_t orig_extent, *st_offsets = NULL;
|
||||
int i, start_loop, end_loop, step;
|
||||
int *coords = NULL, rc = OMPI_SUCCESS;
|
||||
|
||||
if (MPI_PARAM_CHECK) {
|
||||
int prod_psize = 1;
|
||||
@ -93,7 +77,8 @@ int MPI_Type_create_darray(int size,
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
|
||||
}
|
||||
for( i = 0; i < ndims; i++ ) {
|
||||
if( (MPI_DISTRIBUTE_BLOCK != distrib_array[i]) && (MPI_DISTRIBUTE_CYCLIC != distrib_array[i]) &&
|
||||
if( (MPI_DISTRIBUTE_BLOCK != distrib_array[i]) &&
|
||||
(MPI_DISTRIBUTE_CYCLIC != distrib_array[i]) &&
|
||||
(MPI_DISTRIBUTE_NONE != distrib_array[i]) ) {
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
|
||||
} else if( (gsize_array[i] < 1) || (psize_array[i] < 0) ||
|
||||
@ -110,50 +95,112 @@ int MPI_Type_create_darray(int size,
|
||||
if( prod_psize != size )
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
|
||||
}
|
||||
if( ndims < 1 ) {
|
||||
*newtype = &ompi_mpi_datatype_null;
|
||||
|
||||
/* speedy corner case */
|
||||
if (ndims < 1) {
|
||||
/* Don't just return MPI_DATATYPE_NULL as that can't be
|
||||
MPI_TYPE_FREE()ed, and that seems bad */
|
||||
*newtype = ompi_ddt_create(0);
|
||||
ompi_ddt_add(*newtype, &ompi_mpi_datatype_null, 0, 0, 0);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
r = (int*)malloc( ndims * sizeof(int) );
|
||||
rc = ompi_ddt_type_extent(oldtype, &orig_extent);
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* calculate position in grid using row-major ordering */
|
||||
{
|
||||
int t_rank = rank;
|
||||
int t_size = size; /* if prod(psize_array) != size it's a user ERROR */
|
||||
for( i = 0; i < ndims; i++ ) {
|
||||
t_size = t_size / psize_array[i];
|
||||
r[i] = t_rank / t_size;
|
||||
t_rank = t_rank % t_size;
|
||||
int tmp_rank = rank, procs = size;
|
||||
|
||||
coords = (int *) malloc(ndims * sizeof(int));
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
procs = procs / psize_array[i];
|
||||
coords[i] = tmp_rank / procs;
|
||||
tmp_rank = tmp_rank % procs;
|
||||
}
|
||||
}
|
||||
if( MPI_ORDER_FORTRAN == order ) {
|
||||
i = 0;
|
||||
step = 1;
|
||||
end_loop = ndims;
|
||||
|
||||
st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
|
||||
|
||||
/* duplicate type to here to 1) deal with constness without
|
||||
casting and 2) eliminate need to for conditional destroy below.
|
||||
Lame, yes. But cleaner code all around. */
|
||||
rc = ompi_ddt_duplicate(oldtype, &lastType);
|
||||
if (OMPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* figure out ordering issues */
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1; end_loop = -1;
|
||||
} else {
|
||||
i = ndims - 1;
|
||||
step = -1;
|
||||
end_loop = -1;
|
||||
start_loop = 0 ; step = 1; end_loop = ndims;
|
||||
}
|
||||
|
||||
do {
|
||||
darg_i = darg_array[i];
|
||||
if( distrib_array[i] == MPI_DISTRIBUTE_BLOCK ) {
|
||||
if( darg_array[i] == MPI_DISTRIBUTE_DFLT_DARG )
|
||||
darg_i = (gsize_array[i] + psize_array[i] - 1) / psize_array[i];
|
||||
} else if( distrib_array[i] == MPI_DISTRIBUTE_NONE ) {
|
||||
darg_i = gsize_array[i];
|
||||
} else if( distrib_array[i] == MPI_DISTRIBUTE_CYCLIC ) {
|
||||
if( darg_array[i] == MPI_DISTRIBUTE_DFLT_DARG )
|
||||
darg_i = 1;
|
||||
}
|
||||
|
||||
*newtype = cyclic( darg_i, gsize_array[i], r[i], psize_array[i], temptype );
|
||||
ompi_ddt_destroy( &temptype );
|
||||
temptype = *newtype;
|
||||
i += step;
|
||||
} while( i != end_loop );
|
||||
/* Build up array */
|
||||
for (i = start_loop ; i != end_loop; i += step) {
|
||||
int nprocs, rank;
|
||||
|
||||
free( r );
|
||||
switch(distrib_array[i]) {
|
||||
case MPI_DISTRIBUTE_BLOCK:
|
||||
rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_CYCLIC:
|
||||
rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_NONE:
|
||||
/* treat it as a block distribution on 1 process */
|
||||
if (order == MPI_ORDER_C) {
|
||||
nprocs = psize_array[i]; rank = coords[i];
|
||||
} else {
|
||||
nprocs = 1; rank = 0;
|
||||
}
|
||||
|
||||
rc = block(gsize_array, i, ndims, nprocs, rank,
|
||||
MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
default:
|
||||
rc = MPI_ERR_ARG;
|
||||
}
|
||||
ompi_ddt_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
lastType = *newtype;
|
||||
}
|
||||
|
||||
|
||||
/* set displacement and UB correctly. Use struct instead of
|
||||
resized for same reason as subarray */
|
||||
{
|
||||
ptrdiff_t displs[3];
|
||||
ompi_datatype_t *types[3];
|
||||
int tmp_size, blength[3] = { 1, 1, 1};
|
||||
|
||||
displs[1] = st_offsets[start_loop];
|
||||
tmp_size = 1;
|
||||
for (i = start_loop + step ; i != end_loop ; i += step) {
|
||||
tmp_size *= gsize_array[i - step];
|
||||
displs[1] += tmp_size * st_offsets[i];
|
||||
}
|
||||
|
||||
displs[0] = 0;
|
||||
displs[1] *= orig_extent;
|
||||
displs[2] = orig_extent;
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
displs[2] *= gsize_array[i];
|
||||
}
|
||||
types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB;
|
||||
|
||||
rc = ompi_ddt_create_struct(3, blength, displs, types, newtype);
|
||||
ompi_ddt_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
}
|
||||
|
||||
{
|
||||
int* a_i[8];
|
||||
@ -171,5 +218,149 @@ int MPI_Type_create_darray(int size,
|
||||
MPI_COMBINER_DARRAY );
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
cleanup:
|
||||
if (NULL != st_offsets) free(st_offsets);
|
||||
if (NULL != coords) free(coords);
|
||||
|
||||
OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
block(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, global_size, mysize, i, j, rc, start_loop, step;
|
||||
ptrdiff_t stride;
|
||||
|
||||
global_size = gsize_array[dim];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG)
|
||||
blksize = (global_size + nprocs - 1) / nprocs;
|
||||
else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
j = global_size - blksize*rank;
|
||||
mysize = blksize < j ? blksize : j;
|
||||
if (mysize < 0) mysize = 0;
|
||||
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1;
|
||||
} else {
|
||||
start_loop = 0 ; step = 1;
|
||||
}
|
||||
|
||||
stride = orig_extent;
|
||||
if (dim == start_loop) {
|
||||
rc = ompi_ddt_create_contiguous(mysize, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
} else {
|
||||
for (i = start_loop ; i != dim ; i += step) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
rc = ompi_ddt_create_hvector(mysize, 1, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
}
|
||||
|
||||
*st_offset = blksize * rank;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (mysize == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
|
||||
ptrdiff_t stride, disps[2];
|
||||
ompi_datatype_t *type_tmp, *types[2];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
|
||||
blksize = 1;
|
||||
} else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
st_index = rank * blksize;
|
||||
end_index = gsize_array[dim] - 1;
|
||||
|
||||
if (end_index < st_index) {
|
||||
local_size = 0;
|
||||
} else {
|
||||
local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
|
||||
rem = (end_index - st_index + 1) % (nprocs*blksize);
|
||||
local_size += rem < blksize ? rem : blksize;
|
||||
}
|
||||
|
||||
count = local_size / blksize;
|
||||
rem = local_size % blksize;
|
||||
|
||||
stride = nprocs*blksize*orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<dim; i++) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>dim; i--) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
rc = ompi_ddt_create_hvector(count, blksize, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
|
||||
if (rem) {
|
||||
/* if the last block is of size less than blksize, include
|
||||
it separately using MPI_Type_struct */
|
||||
|
||||
types[0] = *type_new;
|
||||
types[1] = type_old;
|
||||
disps[0] = 0;
|
||||
disps[1] = count*stride;
|
||||
blklens[0] = 1;
|
||||
blklens[1] = rem;
|
||||
|
||||
rc = ompi_ddt_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_ddt_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
}
|
||||
|
||||
/* need to set the UB for block-cyclic to work */
|
||||
types[0] = *type_new;
|
||||
types[1] = MPI_UB;
|
||||
disps[0] = 0;
|
||||
disps[1] = orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<=dim; i++) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>=dim; i--) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
blklens[0] = blklens[1] = 1;
|
||||
rc = ompi_ddt_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_ddt_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
|
||||
*st_offset = rank * blksize;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (local_size == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user