1
1

Implement MPI_TYPE_CREATE_DARRAY function. Works with MPICH2 darray-pack

test, Sun's darray test, and an internal LANL test code.  I would not
assume it will work properly on other codes, as I'm still not sure I
completely understand what the standard says this function is supposed to
do.

Refs trac:65

This commit was SVN r13967.

The following Trac tickets were found above:
  Ticket 65 --> https://svn.open-mpi.org/trac/ompi/ticket/65
Этот коммит содержится в:
Brian Barrett 2007-03-08 16:33:08 +00:00
родитель 01512d6950
Коммит e926bed69f
5 изменённых файлов: 263 добавлений и 110 удалений

2
NEWS
Просмотреть файл

@ -47,6 +47,8 @@ Trunk (not on release branches yet)
--> Expected 1.2.x
- Recognize zsh in rsh pls
--> Expected 1.2.x
- Implement MPI_TYPE_CREATE_DARRAY function
--> Expected 1.2.x
1.2

Просмотреть файл

@ -10,6 +10,8 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -41,7 +43,6 @@ libdatatype_reliable_la_CFLAGS = -DCHECKSUM $(AM_CFLAGS)
libdatatype_la_SOURCES = \
dt_add.c \
dt_create.c \
dt_create_array.c \
dt_create_dup.c \
dt_create_indexed.c \
dt_create_struct.c \

Просмотреть файл

@ -10,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -160,14 +162,6 @@ OMPI_DECLSPEC int32_t ompi_ddt_create_indexed_block( int count, int bLength, con
OMPI_DECLSPEC int32_t ompi_ddt_create_struct( int count, const int* pBlockLength, const MPI_Aint* pDisp,
ompi_datatype_t* const* pTypes, ompi_datatype_t** newType );
OMPI_DECLSPEC int32_t ompi_ddt_create_resized( const ompi_datatype_t* oldType, MPI_Aint lb, MPI_Aint extent, ompi_datatype_t** newType );
OMPI_DECLSPEC int32_t ompi_ddt_create_subarray( int ndims, const int* pSizes,
const int* pSubSizes, const int* pStarts,
int order, const ompi_datatype_t* oldType, ompi_datatype_t** newType );
OMPI_DECLSPEC int32_t ompi_ddt_create_darray( int size, int rank, int ndims,
const int* pGSizes, const int *pDistrib,
const int* pDArgs, const int* pPSizes,
int order, const ompi_datatype_t* oldType,
ompi_datatype_t** newType );
OMPI_DECLSPEC int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd, uint32_t count,
ptrdiff_t disp, ptrdiff_t extent );

Просмотреть файл

@ -1,35 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/datatype/datatype.h"
int32_t ompi_ddt_create_subarray( int ndims, const int* pSizes, const int* pSubSizes, const int* pStarts,
int order, const ompi_datatype_t* oldType, ompi_datatype_t** newType )
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int32_t ompi_ddt_create_darray( int size, int rank, int ndims, const int* pGSizes, const int *pDistrib,
const int* pDArgs, const int* pPSizes, int order, const ompi_datatype_t* oldType,
ompi_datatype_t** newType )
{
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -31,35 +33,15 @@
static const char FUNC_NAME[] = "MPI_Type_create_darray";
static ompi_datatype_t* cyclic( int32_t darg, int32_t gsize, int32_t r, int32_t psize, ompi_datatype_t* oldtype )
{
int count, darg_last;
static int block(const int *array_of_gsizes, int dim, int ndims, int nprocs,
int rank, int darg, int order, ptrdiff_t orig_extent,
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
ptrdiff_t *st_offset);
static int cyclic(const int *array_of_gsizes, int dim, int ndims, int nprocs,
int rank, int darg, int order, ptrdiff_t orig_extent,
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
ptrdiff_t *st_offset);
{ /* compute the count */
int nblocks, left_over;
nblocks = (gsize + (darg - 1)) / darg;
count = nblocks / psize;
left_over = nblocks - count * psize;
if( r < left_over )
count++;
}
{ /* compute the darg_last */
int32_t num_in_last_cyclic;
if( 0 == (num_in_last_cyclic = gsize % (psize * darg)) ) {
darg_last = darg;
} else {
darg_last = num_in_last_cyclic - darg * r;
if( darg_last > darg )
darg_last = darg;
if( darg_last <= 0 )
darg_last = darg;
}
}
return &ompi_mpi_datatype_null;
}
int MPI_Type_create_darray(int size,
int rank,
@ -73,8 +55,10 @@ int MPI_Type_create_darray(int size,
MPI_Datatype *newtype)
{
int32_t i, darg_i, step, end_loop, *r;
ompi_datatype_t* temptype;
ompi_datatype_t *lastType;
ptrdiff_t orig_extent, *st_offsets = NULL;
int i, start_loop, end_loop, step;
int *coords = NULL, rc = OMPI_SUCCESS;
if (MPI_PARAM_CHECK) {
int prod_psize = 1;
@ -93,7 +77,8 @@ int MPI_Type_create_darray(int size,
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
}
for( i = 0; i < ndims; i++ ) {
if( (MPI_DISTRIBUTE_BLOCK != distrib_array[i]) && (MPI_DISTRIBUTE_CYCLIC != distrib_array[i]) &&
if( (MPI_DISTRIBUTE_BLOCK != distrib_array[i]) &&
(MPI_DISTRIBUTE_CYCLIC != distrib_array[i]) &&
(MPI_DISTRIBUTE_NONE != distrib_array[i]) ) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
} else if( (gsize_array[i] < 1) || (psize_array[i] < 0) ||
@ -110,50 +95,112 @@ int MPI_Type_create_darray(int size,
if( prod_psize != size )
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
}
if( ndims < 1 ) {
*newtype = &ompi_mpi_datatype_null;
/* speedy corner case */
if (ndims < 1) {
/* Don't just return MPI_DATATYPE_NULL as that can't be
MPI_TYPE_FREE()ed, and that seems bad */
*newtype = ompi_ddt_create(0);
ompi_ddt_add(*newtype, &ompi_mpi_datatype_null, 0, 0, 0);
return MPI_SUCCESS;
}
r = (int*)malloc( ndims * sizeof(int) );
rc = ompi_ddt_type_extent(oldtype, &orig_extent);
if (MPI_SUCCESS != rc) goto cleanup;
/* calculate position in grid using row-major ordering */
{
int t_rank = rank;
int t_size = size; /* if prod(psize_array) != size it's a user ERROR */
for( i = 0; i < ndims; i++ ) {
t_size = t_size / psize_array[i];
r[i] = t_rank / t_size;
t_rank = t_rank % t_size;
int tmp_rank = rank, procs = size;
coords = (int *) malloc(ndims * sizeof(int));
for (i = 0 ; i < ndims ; i++) {
procs = procs / psize_array[i];
coords[i] = tmp_rank / procs;
tmp_rank = tmp_rank % procs;
}
}
if( MPI_ORDER_FORTRAN == order ) {
i = 0;
step = 1;
end_loop = ndims;
st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
/* duplicate type to here to 1) deal with constness without
casting and 2) eliminate need to for conditional destroy below.
Lame, yes. But cleaner code all around. */
rc = ompi_ddt_duplicate(oldtype, &lastType);
if (OMPI_SUCCESS != rc) goto cleanup;
/* figure out ordering issues */
if (MPI_ORDER_C == order) {
start_loop = ndims - 1 ; step = -1; end_loop = -1;
} else {
i = ndims - 1;
step = -1;
end_loop = -1;
start_loop = 0 ; step = 1; end_loop = ndims;
}
do {
darg_i = darg_array[i];
if( distrib_array[i] == MPI_DISTRIBUTE_BLOCK ) {
if( darg_array[i] == MPI_DISTRIBUTE_DFLT_DARG )
darg_i = (gsize_array[i] + psize_array[i] - 1) / psize_array[i];
} else if( distrib_array[i] == MPI_DISTRIBUTE_NONE ) {
darg_i = gsize_array[i];
} else if( distrib_array[i] == MPI_DISTRIBUTE_CYCLIC ) {
if( darg_array[i] == MPI_DISTRIBUTE_DFLT_DARG )
darg_i = 1;
}
*newtype = cyclic( darg_i, gsize_array[i], r[i], psize_array[i], temptype );
ompi_ddt_destroy( &temptype );
temptype = *newtype;
i += step;
} while( i != end_loop );
/* Build up array */
for (i = start_loop ; i != end_loop; i += step) {
int nprocs, rank;
free( r );
switch(distrib_array[i]) {
case MPI_DISTRIBUTE_BLOCK:
rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
darg_array[i], order, orig_extent,
lastType, newtype, st_offsets+i);
break;
case MPI_DISTRIBUTE_CYCLIC:
rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
darg_array[i], order, orig_extent,
lastType, newtype, st_offsets+i);
break;
case MPI_DISTRIBUTE_NONE:
/* treat it as a block distribution on 1 process */
if (order == MPI_ORDER_C) {
nprocs = psize_array[i]; rank = coords[i];
} else {
nprocs = 1; rank = 0;
}
rc = block(gsize_array, i, ndims, nprocs, rank,
MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
lastType, newtype, st_offsets+i);
break;
default:
rc = MPI_ERR_ARG;
}
ompi_ddt_destroy(&lastType);
/* need to destroy the old type even in error condition, so
don't check return code from above until after cleanup. */
if (MPI_SUCCESS != rc) goto cleanup;
lastType = *newtype;
}
/* set displacement and UB correctly. Use struct instead of
resized for same reason as subarray */
{
ptrdiff_t displs[3];
ompi_datatype_t *types[3];
int tmp_size, blength[3] = { 1, 1, 1};
displs[1] = st_offsets[start_loop];
tmp_size = 1;
for (i = start_loop + step ; i != end_loop ; i += step) {
tmp_size *= gsize_array[i - step];
displs[1] += tmp_size * st_offsets[i];
}
displs[0] = 0;
displs[1] *= orig_extent;
displs[2] = orig_extent;
for (i = 0 ; i < ndims ; i++) {
displs[2] *= gsize_array[i];
}
types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB;
rc = ompi_ddt_create_struct(3, blength, displs, types, newtype);
ompi_ddt_destroy(&lastType);
/* need to destroy the old type even in error condition, so
don't check return code from above until after cleanup. */
if (MPI_SUCCESS != rc) goto cleanup;
}
{
int* a_i[8];
@ -171,5 +218,149 @@ int MPI_Type_create_darray(int size,
MPI_COMBINER_DARRAY );
}
return MPI_SUCCESS;
cleanup:
if (NULL != st_offsets) free(st_offsets);
if (NULL != coords) free(coords);
OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME);
}
static int
block(const int *gsize_array, int dim, int ndims, int nprocs,
int rank, int darg, int order, ptrdiff_t orig_extent,
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
ptrdiff_t *st_offset)
{
int blksize, global_size, mysize, i, j, rc, start_loop, step;
ptrdiff_t stride;
global_size = gsize_array[dim];
if (darg == MPI_DISTRIBUTE_DFLT_DARG)
blksize = (global_size + nprocs - 1) / nprocs;
else {
blksize = darg;
}
j = global_size - blksize*rank;
mysize = blksize < j ? blksize : j;
if (mysize < 0) mysize = 0;
if (MPI_ORDER_C == order) {
start_loop = ndims - 1 ; step = -1;
} else {
start_loop = 0 ; step = 1;
}
stride = orig_extent;
if (dim == start_loop) {
rc = ompi_ddt_create_contiguous(mysize, type_old, type_new);
if (OMPI_SUCCESS != rc) return rc;
} else {
for (i = start_loop ; i != dim ; i += step) {
stride *= gsize_array[i];
}
rc = ompi_ddt_create_hvector(mysize, 1, stride, type_old, type_new);
if (OMPI_SUCCESS != rc) return rc;
}
*st_offset = blksize * rank;
/* in terms of no. of elements of type oldtype in this dimension */
if (mysize == 0) *st_offset = 0;
return OMPI_SUCCESS;
}
static int
cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
int rank, int darg, int order, ptrdiff_t orig_extent,
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
ptrdiff_t *st_offset)
{
int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
ptrdiff_t stride, disps[2];
ompi_datatype_t *type_tmp, *types[2];
if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
blksize = 1;
} else {
blksize = darg;
}
st_index = rank * blksize;
end_index = gsize_array[dim] - 1;
if (end_index < st_index) {
local_size = 0;
} else {
local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
rem = (end_index - st_index + 1) % (nprocs*blksize);
local_size += rem < blksize ? rem : blksize;
}
count = local_size / blksize;
rem = local_size % blksize;
stride = nprocs*blksize*orig_extent;
if (order == MPI_ORDER_FORTRAN) {
for (i=0; i<dim; i++) {
stride *= gsize_array[i];
}
} else {
for (i=ndims-1; i>dim; i--) {
stride *= gsize_array[i];
}
}
rc = ompi_ddt_create_hvector(count, blksize, stride, type_old, type_new);
if (OMPI_SUCCESS != rc) return rc;
if (rem) {
/* if the last block is of size less than blksize, include
it separately using MPI_Type_struct */
types[0] = *type_new;
types[1] = type_old;
disps[0] = 0;
disps[1] = count*stride;
blklens[0] = 1;
blklens[1] = rem;
rc = ompi_ddt_create_struct(2, blklens, disps, types, &type_tmp);
ompi_ddt_destroy(type_new);
/* even in error condition, need to destroy type_new, so check
for error after destroy. */
if (OMPI_SUCCESS != rc) return rc;
*type_new = type_tmp;
}
/* need to set the UB for block-cyclic to work */
types[0] = *type_new;
types[1] = MPI_UB;
disps[0] = 0;
disps[1] = orig_extent;
if (order == MPI_ORDER_FORTRAN) {
for (i=0; i<=dim; i++) {
disps[1] *= gsize_array[i];
}
} else {
for (i=ndims-1; i>=dim; i--) {
disps[1] *= gsize_array[i];
}
}
blklens[0] = blklens[1] = 1;
rc = ompi_ddt_create_struct(2, blklens, disps, types, &type_tmp);
ompi_ddt_destroy(type_new);
/* even in error condition, need to destroy type_new, so check
for error after destroy. */
if (OMPI_SUCCESS != rc) return rc;
*type_new = type_tmp;
*st_offset = rank * blksize;
/* in terms of no. of elements of type oldtype in this dimension */
if (local_size == 0) *st_offset = 0;
return OMPI_SUCCESS;
}