darray and subarray are now first class citizens in Open MPI. They can be stored
in packed form and reloaded, as any other type (this is mainly for one sided). This commit was SVN r24480.
Этот коммит содержится в:
родитель
95f4e0b502
Коммит
79b13f36ba
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
# Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -35,6 +35,8 @@ libdatatype_la_SOURCES = \
|
||||
ompi_datatype_create_indexed.c \
|
||||
ompi_datatype_create_struct.c \
|
||||
ompi_datatype_create_vector.c \
|
||||
ompi_datatype_create_darray.c \
|
||||
ompi_datatype_create_subarray.c \
|
||||
ompi_datatype_external32.c \
|
||||
ompi_datatype_match_size.c \
|
||||
ompi_datatype_module.c \
|
||||
|
@ -209,6 +209,13 @@ OMPI_DECLSPEC int32_t ompi_datatype_create_indexed_block( int count, int bLength
|
||||
const ompi_datatype_t* oldType, ompi_datatype_t** newType );
|
||||
OMPI_DECLSPEC int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp,
|
||||
ompi_datatype_t* const* pTypes, ompi_datatype_t** newType );
|
||||
OMPI_DECLSPEC int32_t ompi_datatype_create_darray( int size, int rank, int ndims, int const* gsize_array,
|
||||
int const* distrib_array, int const* darg_array,
|
||||
int const* psize_array, int order, const ompi_datatype_t* oldtype,
|
||||
ompi_datatype_t** newtype);
|
||||
OMPI_DECLSPEC int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array,
|
||||
int const* start_array, int order,
|
||||
const ompi_datatype_t* oldtype, ompi_datatype_t** newtype);
|
||||
static inline int32_t
|
||||
ompi_datatype_create_resized( const ompi_datatype_t* oldType, OPAL_PTRDIFF_TYPE lb, OPAL_PTRDIFF_TYPE extent, ompi_datatype_t** newType )
|
||||
{
|
||||
|
@ -705,48 +705,26 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint*
|
||||
break;
|
||||
/******************************************************************/
|
||||
case MPI_COMBINER_SUBARRAY:
|
||||
/*pos = 1;
|
||||
pArgs->i[0] = i[0][0];
|
||||
memcpy( pArgs->i + pos, i[1], pArgs->i[0] * sizeof(int) );
|
||||
pos += pArgs->i[0];
|
||||
memcpy( pArgs->i + pos, i[2], pArgs->i[0] * sizeof(int) );
|
||||
pos += pArgs->i[0];
|
||||
memcpy( pArgs->i + pos, i[3], pArgs->i[0] * sizeof(int) );
|
||||
pos += pArgs->i[0];
|
||||
pArgs->i[pos] = i[4][0];
|
||||
*/
|
||||
#if 0
|
||||
ompi_datatype_create_subarray( i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]],
|
||||
&i[1 + 2 * i[0]], i[1 + 3 * i[0]],
|
||||
d[0], &datatype );
|
||||
{
|
||||
int* a_i[5]; a_i[0] = &i[0]; a_i[1] = &i[1 + 0 * i[0]]; a_i[2] = &i[1 + 1 * i[0]]; a_i[3] = &i[1 + 2 * i[0]];
|
||||
int* a_i[5]; a_i[0] = &i[0]; a_i[1] = &i[1 + 0 * i[0]]; a_i[2] = &i[1 + 1 * i[0]]; a_i[3] = &i[1 + 2 * i[0]]; a_i[4] = &i[1 + 3 * i[0]];
|
||||
ompi_datatype_set_args( datatype, 3 * i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_SUBARRAY);
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
/******************************************************************/
|
||||
case MPI_COMBINER_DARRAY:
|
||||
/*pos = 3;
|
||||
pArgs->i[0] = i[0][0];
|
||||
pArgs->i[1] = i[1][0];
|
||||
pArgs->i[2] = i[2][0];
|
||||
|
||||
memcpy( pArgs->i + pos, i[3], i[2][0] * sizeof(int) );
|
||||
pos += i[2][0];
|
||||
memcpy( pArgs->i + pos, i[4], i[2][0] * sizeof(int) );
|
||||
pos += i[2][0];
|
||||
memcpy( pArgs->i + pos, i[5], i[2][0] * sizeof(int) );
|
||||
pos += i[2][0];
|
||||
memcpy( pArgs->i + pos, i[6], i[2][0] * sizeof(int) );
|
||||
pos += i[2][0];
|
||||
pArgs->i[pos] = i[7][0];
|
||||
*/
|
||||
#if 0
|
||||
ompi_datatype_create_darray( i[0] /* size */, i[1] /* rank */, i[2] /* ndims */,
|
||||
&i[3 + 0 * i[0]], &i[3 + 1 * i[0]],
|
||||
&i[3 + 2 * i[0]], &i[3 + 3 * i[0]],
|
||||
i[3 + 4 * i[0]], d[0], &datatype );
|
||||
{
|
||||
int* a_i[8]; a_i[0] = &i[0]; a_i[1] = &i[1]; a_i[2] = &i[2];
|
||||
a_i[3] = &i[1 + 0 * i[0]]; a_i[4] = &i[1 + 1 * i[0]]; a_i[5] = &i[1 + 2 * i[0]];
|
||||
a_i[6] = &i[1 + 3 * i[0]]; a_i[7] = &i[1 + 4 * i[0]];
|
||||
a_i[3] = &i[3 + 0 * i[0]]; a_i[4] = &i[3 + 1 * i[0]]; a_i[5] = &i[3 + 2 * i[0]];
|
||||
a_i[6] = &i[3 + 3 * i[0]]; a_i[7] = &i[3 + 4 * i[0]];
|
||||
ompi_datatype_set_args( datatype, 4 * i[0] + 4,a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY);
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
/******************************************************************/
|
||||
case MPI_COMBINER_F90_REAL:
|
||||
|
295
ompi/datatype/ompi_datatype_create_darray.c
Обычный файл
295
ompi/datatype/ompi_datatype_create_darray.c
Обычный файл
@ -0,0 +1,295 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
|
||||
static int
|
||||
block(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, global_size, mysize, i, j, rc, start_loop, step;
|
||||
ptrdiff_t stride;
|
||||
|
||||
global_size = gsize_array[dim];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG)
|
||||
blksize = (global_size + nprocs - 1) / nprocs;
|
||||
else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
j = global_size - blksize*rank;
|
||||
mysize = blksize < j ? blksize : j;
|
||||
if (mysize < 0) mysize = 0;
|
||||
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1;
|
||||
} else {
|
||||
start_loop = 0 ; step = 1;
|
||||
}
|
||||
|
||||
stride = orig_extent;
|
||||
if (dim == start_loop) {
|
||||
rc = ompi_datatype_create_contiguous(mysize, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
} else {
|
||||
for (i = start_loop ; i != dim ; i += step) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
}
|
||||
|
||||
*st_offset = blksize * rank;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (mysize == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
|
||||
ptrdiff_t stride, disps[2];
|
||||
ompi_datatype_t *type_tmp, *types[2];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
|
||||
blksize = 1;
|
||||
} else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
st_index = rank * blksize;
|
||||
end_index = gsize_array[dim] - 1;
|
||||
|
||||
if (end_index < st_index) {
|
||||
local_size = 0;
|
||||
} else {
|
||||
local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
|
||||
rem = (end_index - st_index + 1) % (nprocs*blksize);
|
||||
local_size += rem < blksize ? rem : blksize;
|
||||
}
|
||||
|
||||
count = local_size / blksize;
|
||||
rem = local_size % blksize;
|
||||
|
||||
stride = nprocs*blksize*orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<dim; i++) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>dim; i--) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
|
||||
if (rem) {
|
||||
/* if the last block is of size less than blksize, include
|
||||
it separately using MPI_Type_struct */
|
||||
|
||||
types[0] = *type_new;
|
||||
types[1] = type_old;
|
||||
disps[0] = 0;
|
||||
disps[1] = count*stride;
|
||||
blklens[0] = 1;
|
||||
blklens[1] = rem;
|
||||
|
||||
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_datatype_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
}
|
||||
|
||||
/* need to set the UB for block-cyclic to work */
|
||||
types[0] = *type_new;
|
||||
types[1] = MPI_UB;
|
||||
disps[0] = 0;
|
||||
disps[1] = orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<=dim; i++) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>=dim; i--) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
blklens[0] = blklens[1] = 1;
|
||||
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_datatype_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
|
||||
*st_offset = rank * blksize;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (local_size == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t ompi_datatype_create_darray(int size,
|
||||
int rank,
|
||||
int ndims,
|
||||
int const* gsize_array,
|
||||
int const* distrib_array,
|
||||
int const* darg_array,
|
||||
int const* psize_array,
|
||||
int order,
|
||||
const ompi_datatype_t* oldtype,
|
||||
ompi_datatype_t** newtype)
|
||||
{
|
||||
ompi_datatype_t *lastType;
|
||||
ptrdiff_t orig_extent, *st_offsets = NULL;
|
||||
int i, start_loop, end_loop, step;
|
||||
int *coords = NULL, rc = OMPI_SUCCESS;
|
||||
|
||||
/* speedy corner case */
|
||||
if (ndims < 1) {
|
||||
/* Don't just return MPI_DATATYPE_NULL as that can't be
|
||||
MPI_TYPE_FREE()ed, and that seems bad */
|
||||
*newtype = ompi_datatype_create(0);
|
||||
ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
rc = ompi_datatype_type_extent(oldtype, &orig_extent);
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* calculate position in grid using row-major ordering */
|
||||
{
|
||||
int tmp_rank = rank, procs = size;
|
||||
|
||||
coords = (int *) malloc(ndims * sizeof(int));
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
procs = procs / psize_array[i];
|
||||
coords[i] = tmp_rank / procs;
|
||||
tmp_rank = tmp_rank % procs;
|
||||
}
|
||||
}
|
||||
|
||||
st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
|
||||
|
||||
/* duplicate type to here to 1) deal with constness without
|
||||
casting and 2) eliminate need to for conditional destroy below.
|
||||
Lame, yes. But cleaner code all around. */
|
||||
rc = ompi_datatype_duplicate(oldtype, &lastType);
|
||||
if (OMPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* figure out ordering issues */
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1; end_loop = -1;
|
||||
} else {
|
||||
start_loop = 0 ; step = 1; end_loop = ndims;
|
||||
}
|
||||
|
||||
/* Build up array */
|
||||
for (i = start_loop ; i != end_loop; i += step) {
|
||||
int nprocs, tmp_rank;
|
||||
|
||||
switch(distrib_array[i]) {
|
||||
case MPI_DISTRIBUTE_BLOCK:
|
||||
rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_CYCLIC:
|
||||
rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_NONE:
|
||||
/* treat it as a block distribution on 1 process */
|
||||
if (order == MPI_ORDER_C) {
|
||||
nprocs = psize_array[i]; tmp_rank = coords[i];
|
||||
} else {
|
||||
nprocs = 1; tmp_rank = 0;
|
||||
}
|
||||
|
||||
rc = block(gsize_array, i, ndims, nprocs, tmp_rank,
|
||||
MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
default:
|
||||
rc = MPI_ERR_ARG;
|
||||
}
|
||||
ompi_datatype_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
lastType = *newtype;
|
||||
}
|
||||
|
||||
|
||||
/* set displacement and UB correctly. Use struct instead of
|
||||
resized for same reason as subarray */
|
||||
{
|
||||
ptrdiff_t displs[3];
|
||||
ompi_datatype_t *types[3];
|
||||
int tmp_size, blength[3] = { 1, 1, 1};
|
||||
|
||||
displs[1] = st_offsets[start_loop];
|
||||
tmp_size = 1;
|
||||
for (i = start_loop + step ; i != end_loop ; i += step) {
|
||||
tmp_size *= gsize_array[i - step];
|
||||
displs[1] += tmp_size * st_offsets[i];
|
||||
}
|
||||
|
||||
displs[0] = 0;
|
||||
displs[1] *= orig_extent;
|
||||
displs[2] = orig_extent;
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
displs[2] *= gsize_array[i];
|
||||
}
|
||||
types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB;
|
||||
|
||||
rc = ompi_datatype_create_struct(3, blength, displs, types, newtype);
|
||||
ompi_datatype_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (NULL != st_offsets) free(st_offsets);
|
||||
if (NULL != coords) free(coords);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
104
ompi/datatype/ompi_datatype_create_subarray.c
Обычный файл
104
ompi/datatype/ompi_datatype_create_subarray.c
Обычный файл
@ -0,0 +1,104 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
|
||||
int32_t ompi_datatype_create_subarray(int ndims,
|
||||
int const* size_array,
|
||||
int const* subsize_array,
|
||||
int const* start_array,
|
||||
int order,
|
||||
const ompi_datatype_t* oldtype,
|
||||
ompi_datatype_t** newtype)
|
||||
{
|
||||
MPI_Datatype last_type;
|
||||
int32_t i, step, end_loop;
|
||||
MPI_Aint size, displ, extent;
|
||||
|
||||
ompi_datatype_type_extent( oldtype, &extent );
|
||||
|
||||
/* If the ndims is zero then return the NULL datatype */
|
||||
if( ndims < 2 ) {
|
||||
if( 0 == ndims ) {
|
||||
*newtype = &ompi_mpi_datatype_null.dt;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type );
|
||||
size = size_array[0];
|
||||
displ = start_array[0];
|
||||
goto replace_subarray_type;
|
||||
}
|
||||
|
||||
if( MPI_ORDER_C == order ) {
|
||||
i = ndims - 1;
|
||||
step = -1;
|
||||
end_loop = -1;
|
||||
} else {
|
||||
i = 0;
|
||||
step = 1;
|
||||
end_loop = ndims;
|
||||
}
|
||||
|
||||
/* As we know that the ndims is at least 1 we can start by creating the
|
||||
* first dimension data outside the loop, such that we dont have to create
|
||||
* a duplicate of the oldtype just to be able to free it.
|
||||
*/
|
||||
ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i],
|
||||
oldtype, newtype );
|
||||
|
||||
last_type = *newtype;
|
||||
size = size_array[i] * size_array[i+step];
|
||||
displ = start_array[i] + start_array[i+step] * size_array[i];
|
||||
for( i += 2 * step; i != end_loop; i += step ) {
|
||||
ompi_datatype_create_hvector( subsize_array[i], 1, size * extent,
|
||||
last_type, newtype );
|
||||
ompi_datatype_destroy( &last_type );
|
||||
displ += size * start_array[i];
|
||||
size *= size_array[i];
|
||||
last_type = *newtype;
|
||||
}
|
||||
|
||||
replace_subarray_type:
|
||||
/**
|
||||
* We cannot use resized here. Resized will only set the soft lb and ub markers
|
||||
* without moving the real data inside. What we need is to force the displacement
|
||||
* of the data create upward to the right position AND set the LB and UB. A type
|
||||
* struct is the function we need.
|
||||
*/
|
||||
{
|
||||
MPI_Aint displs[3];
|
||||
MPI_Datatype types[3];
|
||||
int blength[3] = { 1, 1, 1 };
|
||||
|
||||
displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent;
|
||||
types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB;
|
||||
ompi_datatype_create_struct( 3, blength, displs, types, newtype );
|
||||
}
|
||||
ompi_datatype_destroy( &last_type );
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
||||
@ -38,16 +38,6 @@
|
||||
|
||||
static const char FUNC_NAME[] = "MPI_Type_create_darray";
|
||||
|
||||
static int block(const int *array_of_gsizes, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset);
|
||||
static int cyclic(const int *array_of_gsizes, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset);
|
||||
|
||||
|
||||
int MPI_Type_create_darray(int size,
|
||||
int rank,
|
||||
int ndims,
|
||||
@ -60,10 +50,7 @@ int MPI_Type_create_darray(int size,
|
||||
MPI_Datatype *newtype)
|
||||
|
||||
{
|
||||
ompi_datatype_t *lastType;
|
||||
ptrdiff_t orig_extent, *st_offsets = NULL;
|
||||
int i, start_loop, end_loop, step;
|
||||
int *coords = NULL, rc = OMPI_SUCCESS;
|
||||
int i, rc;
|
||||
|
||||
MEMCHECKER(
|
||||
memchecker_datatype(oldtype);
|
||||
@ -107,114 +94,10 @@ int MPI_Type_create_darray(int size,
|
||||
|
||||
OPAL_CR_ENTER_LIBRARY();
|
||||
|
||||
/* speedy corner case */
|
||||
if (ndims < 1) {
|
||||
/* Don't just return MPI_DATATYPE_NULL as that can't be
|
||||
MPI_TYPE_FREE()ed, and that seems bad */
|
||||
*newtype = ompi_datatype_create(0);
|
||||
ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0);
|
||||
OPAL_CR_EXIT_LIBRARY();
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
rc = ompi_datatype_type_extent(oldtype, &orig_extent);
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* calculate position in grid using row-major ordering */
|
||||
{
|
||||
int tmp_rank = rank, procs = size;
|
||||
|
||||
coords = (int *) malloc(ndims * sizeof(int));
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
procs = procs / psize_array[i];
|
||||
coords[i] = tmp_rank / procs;
|
||||
tmp_rank = tmp_rank % procs;
|
||||
}
|
||||
}
|
||||
|
||||
st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
|
||||
|
||||
/* duplicate type to here to 1) deal with constness without
|
||||
casting and 2) eliminate need to for conditional destroy below.
|
||||
Lame, yes. But cleaner code all around. */
|
||||
rc = ompi_datatype_duplicate(oldtype, &lastType);
|
||||
if (OMPI_SUCCESS != rc) goto cleanup;
|
||||
|
||||
/* figure out ordering issues */
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1; end_loop = -1;
|
||||
} else {
|
||||
start_loop = 0 ; step = 1; end_loop = ndims;
|
||||
}
|
||||
|
||||
/* Build up array */
|
||||
for (i = start_loop ; i != end_loop; i += step) {
|
||||
int nprocs, tmp_rank;
|
||||
|
||||
switch(distrib_array[i]) {
|
||||
case MPI_DISTRIBUTE_BLOCK:
|
||||
rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_CYCLIC:
|
||||
rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
|
||||
darg_array[i], order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
case MPI_DISTRIBUTE_NONE:
|
||||
/* treat it as a block distribution on 1 process */
|
||||
if (order == MPI_ORDER_C) {
|
||||
nprocs = psize_array[i]; tmp_rank = coords[i];
|
||||
} else {
|
||||
nprocs = 1; tmp_rank = 0;
|
||||
}
|
||||
|
||||
rc = block(gsize_array, i, ndims, nprocs, tmp_rank,
|
||||
MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
|
||||
lastType, newtype, st_offsets+i);
|
||||
break;
|
||||
default:
|
||||
rc = MPI_ERR_ARG;
|
||||
}
|
||||
ompi_datatype_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
lastType = *newtype;
|
||||
}
|
||||
|
||||
|
||||
/* set displacement and UB correctly. Use struct instead of
|
||||
resized for same reason as subarray */
|
||||
{
|
||||
ptrdiff_t displs[3];
|
||||
ompi_datatype_t *types[3];
|
||||
int tmp_size, blength[3] = { 1, 1, 1};
|
||||
|
||||
displs[1] = st_offsets[start_loop];
|
||||
tmp_size = 1;
|
||||
for (i = start_loop + step ; i != end_loop ; i += step) {
|
||||
tmp_size *= gsize_array[i - step];
|
||||
displs[1] += tmp_size * st_offsets[i];
|
||||
}
|
||||
|
||||
displs[0] = 0;
|
||||
displs[1] *= orig_extent;
|
||||
displs[2] = orig_extent;
|
||||
for (i = 0 ; i < ndims ; i++) {
|
||||
displs[2] *= gsize_array[i];
|
||||
}
|
||||
types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB;
|
||||
|
||||
rc = ompi_datatype_create_struct(3, blength, displs, types, newtype);
|
||||
ompi_datatype_destroy(&lastType);
|
||||
/* need to destroy the old type even in error condition, so
|
||||
don't check return code from above until after cleanup. */
|
||||
if (MPI_SUCCESS != rc) goto cleanup;
|
||||
}
|
||||
|
||||
{
|
||||
rc = ompi_datatype_create_darray( size, rank, ndims,
|
||||
gsize_array, distrib_array, darg_array, psize_array,
|
||||
order, oldtype, newtype );
|
||||
if( OMPI_SUCCESS == rc ) {
|
||||
int* a_i[8];
|
||||
|
||||
a_i[0] = &size;
|
||||
@ -230,151 +113,7 @@ int MPI_Type_create_darray(int size,
|
||||
MPI_COMBINER_DARRAY );
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (NULL != st_offsets) free(st_offsets);
|
||||
if (NULL != coords) free(coords);
|
||||
|
||||
OPAL_CR_EXIT_LIBRARY();
|
||||
|
||||
OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
block(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, global_size, mysize, i, j, rc, start_loop, step;
|
||||
ptrdiff_t stride;
|
||||
|
||||
global_size = gsize_array[dim];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG)
|
||||
blksize = (global_size + nprocs - 1) / nprocs;
|
||||
else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
j = global_size - blksize*rank;
|
||||
mysize = blksize < j ? blksize : j;
|
||||
if (mysize < 0) mysize = 0;
|
||||
|
||||
if (MPI_ORDER_C == order) {
|
||||
start_loop = ndims - 1 ; step = -1;
|
||||
} else {
|
||||
start_loop = 0 ; step = 1;
|
||||
}
|
||||
|
||||
stride = orig_extent;
|
||||
if (dim == start_loop) {
|
||||
rc = ompi_datatype_create_contiguous(mysize, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
} else {
|
||||
for (i = start_loop ; i != dim ; i += step) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
}
|
||||
|
||||
*st_offset = blksize * rank;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (mysize == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
|
||||
int rank, int darg, int order, ptrdiff_t orig_extent,
|
||||
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
|
||||
ptrdiff_t *st_offset)
|
||||
{
|
||||
int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
|
||||
ptrdiff_t stride, disps[2];
|
||||
ompi_datatype_t *type_tmp, *types[2];
|
||||
|
||||
if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
|
||||
blksize = 1;
|
||||
} else {
|
||||
blksize = darg;
|
||||
}
|
||||
|
||||
st_index = rank * blksize;
|
||||
end_index = gsize_array[dim] - 1;
|
||||
|
||||
if (end_index < st_index) {
|
||||
local_size = 0;
|
||||
} else {
|
||||
local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
|
||||
rem = (end_index - st_index + 1) % (nprocs*blksize);
|
||||
local_size += rem < blksize ? rem : blksize;
|
||||
}
|
||||
|
||||
count = local_size / blksize;
|
||||
rem = local_size % blksize;
|
||||
|
||||
stride = nprocs*blksize*orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<dim; i++) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>dim; i--) {
|
||||
stride *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new);
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
|
||||
if (rem) {
|
||||
/* if the last block is of size less than blksize, include
|
||||
it separately using MPI_Type_struct */
|
||||
|
||||
types[0] = *type_new;
|
||||
types[1] = type_old;
|
||||
disps[0] = 0;
|
||||
disps[1] = count*stride;
|
||||
blklens[0] = 1;
|
||||
blklens[1] = rem;
|
||||
|
||||
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_datatype_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
}
|
||||
|
||||
/* need to set the UB for block-cyclic to work */
|
||||
types[0] = *type_new;
|
||||
types[1] = MPI_UB;
|
||||
disps[0] = 0;
|
||||
disps[1] = orig_extent;
|
||||
if (order == MPI_ORDER_FORTRAN) {
|
||||
for (i=0; i<=dim; i++) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
} else {
|
||||
for (i=ndims-1; i>=dim; i--) {
|
||||
disps[1] *= gsize_array[i];
|
||||
}
|
||||
}
|
||||
blklens[0] = blklens[1] = 1;
|
||||
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
|
||||
ompi_datatype_destroy(type_new);
|
||||
/* even in error condition, need to destroy type_new, so check
|
||||
for error after destroy. */
|
||||
if (OMPI_SUCCESS != rc) return rc;
|
||||
*type_new = type_tmp;
|
||||
|
||||
*st_offset = rank * blksize;
|
||||
/* in terms of no. of elements of type oldtype in this dimension */
|
||||
if (local_size == 0) *st_offset = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
||||
@ -44,11 +44,8 @@ int MPI_Type_create_subarray(int ndims,
|
||||
int order,
|
||||
MPI_Datatype oldtype,
|
||||
MPI_Datatype *newtype)
|
||||
|
||||
{
|
||||
MPI_Datatype last_type;
|
||||
int32_t i, step, end_loop;
|
||||
MPI_Aint size, displ, extent;
|
||||
int32_t i, rc;
|
||||
|
||||
MEMCHECKER(
|
||||
memchecker_datatype(oldtype);
|
||||
@ -76,69 +73,9 @@ int MPI_Type_create_subarray(int ndims,
|
||||
|
||||
OPAL_CR_ENTER_LIBRARY();
|
||||
|
||||
ompi_datatype_type_extent( oldtype, &extent );
|
||||
|
||||
/* If the ndims is zero then return the NULL datatype */
|
||||
if( ndims < 2 ) {
|
||||
if( 0 == ndims ) {
|
||||
*newtype = &ompi_mpi_datatype_null.dt;
|
||||
OPAL_CR_EXIT_LIBRARY();
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type );
|
||||
size = size_array[0];
|
||||
displ = start_array[0];
|
||||
goto replace_subarray_type;
|
||||
}
|
||||
|
||||
if( MPI_ORDER_C == order ) {
|
||||
i = ndims - 1;
|
||||
step = -1;
|
||||
end_loop = -1;
|
||||
} else {
|
||||
i = 0;
|
||||
step = 1;
|
||||
end_loop = ndims;
|
||||
}
|
||||
|
||||
/* As we know that the ndims is at least 1 we can start by creating the
|
||||
* first dimension data outside the loop, such that we dont have to create
|
||||
* a duplicate of the oldtype just to be able to free it.
|
||||
*/
|
||||
ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i],
|
||||
oldtype, newtype );
|
||||
|
||||
last_type = *newtype;
|
||||
size = size_array[i] * size_array[i+step];
|
||||
displ = start_array[i] + start_array[i+step] * size_array[i];
|
||||
for( i += 2 * step; i != end_loop; i += step ) {
|
||||
ompi_datatype_create_hvector( subsize_array[i], 1, size * extent,
|
||||
last_type, newtype );
|
||||
ompi_datatype_destroy( &last_type );
|
||||
displ += size * start_array[i];
|
||||
size *= size_array[i];
|
||||
last_type = *newtype;
|
||||
}
|
||||
|
||||
replace_subarray_type:
|
||||
/**
|
||||
* We cannot use resized here. Resized will only set the soft lb and ub markers
|
||||
* without moving the real data inside. What we need is to force the displacement
|
||||
* of the data create upward to the right position AND set the LB and UB. A type
|
||||
* struct is the function we need.
|
||||
*/
|
||||
{
|
||||
MPI_Aint displs[3];
|
||||
MPI_Datatype types[3];
|
||||
int blength[3] = { 1, 1, 1 };
|
||||
|
||||
displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent;
|
||||
types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB;
|
||||
ompi_datatype_create_struct( 3, blength, displs, types, newtype );
|
||||
}
|
||||
ompi_datatype_destroy( &last_type );
|
||||
|
||||
{
|
||||
rc = ompi_datatype_create_subarray( ndims, size_array, subsize_array, start_array,
|
||||
order, oldtype, newtype);
|
||||
if( OMPI_SUCCESS == rc ) {
|
||||
int* a_i[5];
|
||||
|
||||
a_i[0] = &ndims;
|
||||
@ -148,10 +85,10 @@ int MPI_Type_create_subarray(int ndims,
|
||||
a_i[4] = ℴ
|
||||
|
||||
ompi_datatype_set_args( *newtype, 3 * ndims + 2, a_i, 0, NULL, 1, &oldtype,
|
||||
MPI_COMBINER_SUBARRAY );
|
||||
MPI_COMBINER_SUBARRAY );
|
||||
}
|
||||
|
||||
OPAL_CR_EXIT_LIBRARY();
|
||||
|
||||
return MPI_SUCCESS;
|
||||
OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME);
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user