snapshot as I move machines
do not use This commit was SVN r7103.
Этот коммит содержится в:
родитель
d64a702a5b
Коммит
1caec16018
0
ompi/mca/coll/tuned/.ompi_ignore
Обычный файл
0
ompi/mca/coll/tuned/.ompi_ignore
Обычный файл
1
ompi/mca/coll/tuned/.ompi_unignore
Обычный файл
1
ompi/mca/coll/tuned/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
|||||||
|
gef
|
49
ompi/mca/coll/tuned/Makefile.am
Обычный файл
49
ompi/mca/coll/tuned/Makefile.am
Обычный файл
@ -0,0 +1,49 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
# Use the top-level Makefile.options
|
||||||
|
|
||||||
|
include $(top_ompi_srcdir)/config/Makefile.options
|
||||||
|
|
||||||
|
sources = \
|
||||||
|
coll_tuned.h \
|
||||||
|
coll_tuned_topo.h \
|
||||||
|
coll_tuned_topo.c \
|
||||||
|
coll_tuned_bcast_decision.c \
|
||||||
|
coll_tuned_bcast.c \
|
||||||
|
coll_tuned_component.c \
|
||||||
|
coll_tuned_module.c
|
||||||
|
|
||||||
|
# Make the output library in this directory, and name it either
|
||||||
|
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||||
|
# (for static builds).
|
||||||
|
|
||||||
|
if OMPI_BUILD_coll_tuned_DSO
|
||||||
|
component_noinst =
|
||||||
|
component_install = mca_coll_tuned.la
|
||||||
|
else
|
||||||
|
component_noinst = libmca_coll_tuned.la
|
||||||
|
component_install =
|
||||||
|
endif
|
||||||
|
|
||||||
|
mcacomponentdir = $(libdir)/openmpi
|
||||||
|
mcacomponent_LTLIBRARIES = $(component_install)
|
||||||
|
mca_coll_tuned_la_SOURCES = $(sources)
|
||||||
|
mca_coll_tuned_la_LDFLAGS = -module -avoid-version
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES = $(component_noinst)
|
||||||
|
libmca_coll_tuned_la_SOURCES =$(sources)
|
||||||
|
libmca_coll_tuned_la_LDFLAGS = -module -avoid-version
|
307
ompi/mca/coll/tuned/coll_tuned.h
Обычный файл
307
ompi/mca/coll/tuned/coll_tuned.h
Обычный файл
@ -0,0 +1,307 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_COLL_TUNED_EXPORT_H
|
||||||
|
#define MCA_COLL_TUNED_EXPORT_H
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "mca/mca.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "request/request.h"
|
||||||
|
#include "mca/pml/pml.h"
|
||||||
|
|
||||||
|
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||||
|
#include "coll_tuned_topo.h"
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Globally exported variable
|
||||||
|
*/
|
||||||
|
|
||||||
|
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
|
||||||
|
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority_param;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* coll API functions
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* API functions */
|
||||||
|
|
||||||
|
int mca_coll_tuned_init_query(bool enable_progress_threads,
|
||||||
|
bool enable_mpi_threads);
|
||||||
|
const struct mca_coll_base_module_1_0_0_t *
|
||||||
|
mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||||
|
struct mca_coll_base_comm_t **data);
|
||||||
|
|
||||||
|
const struct mca_coll_base_module_1_0_0_t *
|
||||||
|
mca_coll_tuned_module_init(struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
/* API functions of decision functions and any implementations */
|
||||||
|
|
||||||
|
int mca_coll_tuned_allgather_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void *rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_allgather_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void *rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_allgatherv_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void * rbuf, int *rcounts, int *disps,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_allgatherv_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void * rbuf, int *rcounts, int *disps,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_allreduce_intra_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_allreduce_inter_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_alltoall_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void* rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_alltoall_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void* rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_alltoallv_intra_dec(void *sbuf, int *scounts, int *sdisps,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void *rbuf, int *rcounts, int *rdisps,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_alltoallv_inter_dec(void *sbuf, int *scounts, int *sdisps,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void *rbuf, int *rcounts, int *rdisps,
|
||||||
|
struct ompi_datatype_t *rdtype,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_alltoallw_intra_dec(void *sbuf, int *scounts, int *sdisps,
|
||||||
|
struct ompi_datatype_t **sdtypes,
|
||||||
|
void *rbuf, int *rcounts, int *rdisps,
|
||||||
|
struct ompi_datatype_t **rdtypes,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_alltoallw_inter_dec(void *sbuf, int *scounts, int *sdisps,
|
||||||
|
struct ompi_datatype_t **sdtypes,
|
||||||
|
void *rbuf, int *rcounts, int *rdisps,
|
||||||
|
struct ompi_datatype_t **rdtypes,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_barrier_intra_dec(struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_barrier_inter_dec(struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_dec(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_linear(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_chain(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize, int32_t chains);
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_pipeline(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize);
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_bmtree(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize, int32_t chains);
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_intra_bintree(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int mca_coll_tuned_bcast_inter_dec(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_exscan_intra_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_exscan_inter_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_gather_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int rcount, struct ompi_datatype_t *rdtype,
|
||||||
|
int root, struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_gather_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int rcount, struct ompi_datatype_t *rdtype,
|
||||||
|
int root, struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_gatherv_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int *rcounts, int *disps,
|
||||||
|
struct ompi_datatype_t *rdtype, int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_gatherv_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int *rcounts, int *disps,
|
||||||
|
struct ompi_datatype_t *rdtype, int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_reduce_intra_dec(void *sbuf, void* rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_reduce_inter_dec(void *sbuf, void* rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_reduce_scatter_intra_dec(void *sbuf, void *rbuf,
|
||||||
|
int *rcounts,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_reduce_scatter_inter_dec(void *sbuf, void *rbuf,
|
||||||
|
int *rcounts,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_scan_intra_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_scan_inter_dec(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype,
|
||||||
|
struct ompi_op_t *op,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_scatter_intra_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int rcount, struct ompi_datatype_t *rdtype,
|
||||||
|
int root, struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_scatter_inter_dec(void *sbuf, int scount,
|
||||||
|
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||||
|
int rcount, struct ompi_datatype_t *rdtype,
|
||||||
|
int root, struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
int mca_coll_tuned_scatterv_intra_dec(void *sbuf, int *scounts, int *disps,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void* rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype, int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
int mca_coll_tuned_scatterv_inter_dec(void *sbuf, int *scounts, int *disps,
|
||||||
|
struct ompi_datatype_t *sdtype,
|
||||||
|
void* rbuf, int rcount,
|
||||||
|
struct ompi_datatype_t *rdtype, int root,
|
||||||
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Utility functions */
|
||||||
|
|
||||||
|
static inline void mca_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < count; ++i)
|
||||||
|
ompi_request_free(&reqs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* decision table declaraion */
|
||||||
|
/* currently a place holder */
|
||||||
|
typedef struct rule_s {
|
||||||
|
} rule_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data structure for hanging data off the communicator
|
||||||
|
*/
|
||||||
|
struct mca_coll_base_comm_t {
|
||||||
|
/* standard data for requests and PML usage */
|
||||||
|
|
||||||
|
/* we need to keep this here for now incase we fall through to the basic functions
|
||||||
|
* that expect these fields/and memory to be avaliable (GEF something for JS?)
|
||||||
|
*/
|
||||||
|
ompi_request_t **mccb_reqs;
|
||||||
|
int mccb_num_reqs;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* tuned topo information caching per communicator
|
||||||
|
*
|
||||||
|
* for each communicator we cache the topo information so we can reuse without regenerating
|
||||||
|
* if we change the root, [or fanout] then regenerate and recache this information
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
ompi_coll_tree_t *cached_tree;
|
||||||
|
int cached_tree_root;
|
||||||
|
int cached_tree_fanout;
|
||||||
|
|
||||||
|
ompi_coll_bmtree_t *cached_bmtree;
|
||||||
|
int cached_bmtree_root;
|
||||||
|
|
||||||
|
ompi_coll_chain_t *cached_chain;
|
||||||
|
int cached_chain_root;
|
||||||
|
int cached_chain_fanout;
|
||||||
|
|
||||||
|
/* extra data required by the decision functions */
|
||||||
|
rule_t* decision_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif /* MCA_COLL_TUNED_EXPORT_H */
|
465
ompi/mca/coll/tuned/coll_tuned_bcast.c
Обычный файл
465
ompi/mca/coll/tuned/coll_tuned_bcast.c
Обычный файл
@ -0,0 +1,465 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "ompi/include/constants.h"
|
||||||
|
#include "datatype/datatype.h"
|
||||||
|
#include "communicator/communicator.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "mca/coll/base/coll_tags.h"
|
||||||
|
#include "mca/pml/pml.h"
|
||||||
|
|
||||||
|
/* external prototype we need */
|
||||||
|
extern int ompi_mpi_sendrecv( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
|
||||||
|
int dest, int stag,
|
||||||
|
void* recvbuf, int rcount, ompi_datatype_t* rdata,
|
||||||
|
int source, int rtag,
|
||||||
|
struct ompi_communicator_t* comm,
|
||||||
|
ompi_status_public_t* status );
|
||||||
|
|
||||||
|
int
|
||||||
|
mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize, int32_t chains )
|
||||||
|
{
|
||||||
|
int err = 0, line, rank, size, segindex, i;
|
||||||
|
int segcount; /* Number of elements sent with each segment */
|
||||||
|
int num_segments; /* Number of segmenets */
|
||||||
|
int sendcount; /* the same like segcount, except for the last segment */
|
||||||
|
int new_sendcount; /* used to mane the size for the next pipelined receive */
|
||||||
|
int realsegsize;
|
||||||
|
char *tmpbuf = (char*)buff;
|
||||||
|
long ext;
|
||||||
|
int typelng;
|
||||||
|
MPI_Request base_req, new_req;
|
||||||
|
ompi_coll_chain_t* chain;
|
||||||
|
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
printf("mca_coll_tuned_bcast_intra_chain rank %d root %d\n", rank, root);
|
||||||
|
|
||||||
|
if( size == 1 ) {
|
||||||
|
return MPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* setup the chain topology.
|
||||||
|
* if the previous chain topology is the same, then use this cached copy
|
||||||
|
* other wise recreate it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ((comm->c_coll_selected_data->cached_chain) && (comm->c_coll_selected_data->cached_chain_root == root)
|
||||||
|
&& (comm->c_coll_selected_data->cached_chain_fanout == chains)) {
|
||||||
|
chain = comm->c_coll_selected_data->cached_chain;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (comm->c_coll_selected_data->cached_chain) { /* destroy previous chain if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
|
||||||
|
}
|
||||||
|
comm->c_coll_selected_data->cached_chain = chain = ompi_coll_tuned_topo_build_chain( chains, comm, root );
|
||||||
|
comm->c_coll_selected_data->cached_chain_root = root;
|
||||||
|
comm->c_coll_selected_data->cached_chain_fanout = chains;
|
||||||
|
}
|
||||||
|
|
||||||
|
ompi_ddt_type_size( datatype, &typelng );
|
||||||
|
|
||||||
|
/* Determine number of segments and number of elements
|
||||||
|
* sent per operation */
|
||||||
|
if( segsize == 0 ) {
|
||||||
|
/* no segmentation */
|
||||||
|
segcount = count;
|
||||||
|
num_segments = 1;
|
||||||
|
} else {
|
||||||
|
/* segment the message */
|
||||||
|
segcount = segsize / typelng;
|
||||||
|
num_segments = count / segcount;
|
||||||
|
if ((count % segcount)!= 0) num_segments++;
|
||||||
|
}
|
||||||
|
realsegsize = segcount*ext;
|
||||||
|
/* set the buffer pointer */
|
||||||
|
tmpbuf = (char *)buff;
|
||||||
|
|
||||||
|
/* root code */
|
||||||
|
if( rank == root ) {
|
||||||
|
/* for each segment */
|
||||||
|
sendcount = segcount;
|
||||||
|
for (segindex = 0; segindex < num_segments; segindex++) {
|
||||||
|
/* determine how many elements are being sent in this round */
|
||||||
|
if( segindex == (num_segments - 1) )
|
||||||
|
sendcount = count - segindex*segcount;
|
||||||
|
for( i = 0; i < chain->chain_nextsize; i++ ) {
|
||||||
|
err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
|
||||||
|
chain->chain_next[i],
|
||||||
|
MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD,comm));
|
||||||
|
if( MPI_SUCCESS != err ) { line = __LINE__; goto error_hndl; }
|
||||||
|
}
|
||||||
|
/* update tmp buffer */
|
||||||
|
tmpbuf += realsegsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* intermediate nodes code */
|
||||||
|
else if (chain->chain_nextsize > 0) {
|
||||||
|
/* Create the pipeline. We first post the first receive, then in the loop we
|
||||||
|
* post the next receive and after that wait for the previous receive to
|
||||||
|
* complete and we disseminating the data to all children.
|
||||||
|
*/
|
||||||
|
new_sendcount = sendcount = segcount;
|
||||||
|
err = MCA_PML_CALL(recv( tmpbuf, sendcount, datatype,
|
||||||
|
chain->chain_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
|
||||||
|
for( segindex = 1; segindex < num_segments; segindex++ ) {
|
||||||
|
/* determine how many elements to expect in this round */
|
||||||
|
if( segindex == (num_segments - 1))
|
||||||
|
new_sendcount = count - segindex*segcount;
|
||||||
|
/* post new irecv */
|
||||||
|
err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, new_sendcount,
|
||||||
|
datatype, chain->chain_prev,
|
||||||
|
MCA_COLL_BASE_TAG_BCAST, comm, &new_req));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
|
||||||
|
/* wait for and forward current segment */
|
||||||
|
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||||
|
for( i = 0; i < chain->chain_nextsize; i++ ) {
|
||||||
|
/* send data to children */
|
||||||
|
err = MCA_PML_CALL(send( tmpbuf, sendcount, datatype,
|
||||||
|
chain->chain_next[i],
|
||||||
|
MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
} /* end of for each child */
|
||||||
|
/* upate the base request */
|
||||||
|
base_req = new_req;
|
||||||
|
/* go to the next buffer (ie. the one corresponding to the next recv) */
|
||||||
|
tmpbuf += realsegsize;
|
||||||
|
sendcount = new_sendcount;
|
||||||
|
} /* end of for segindex */
|
||||||
|
|
||||||
|
/* wait for the last segment and forward current segment */
|
||||||
|
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||||
|
for( i = 0; i < chain->chain_nextsize; i++ ) {
|
||||||
|
/* send data to children */
|
||||||
|
err = MCA_PML_CALL(send( tmpbuf, sendcount, datatype,
|
||||||
|
chain->chain_next[i],
|
||||||
|
MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
} /* end of for each child */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* leaf nodes */
|
||||||
|
else {
|
||||||
|
sendcount = segcount;
|
||||||
|
for (segindex = 0; segindex < num_segments; segindex++) {
|
||||||
|
/* determine how many elements to expect in this round */
|
||||||
|
if (segindex == (num_segments - 1))
|
||||||
|
sendcount = count - segindex*segcount;
|
||||||
|
/* receive segments */
|
||||||
|
err = MCA_PML_CALL(recv( tmpbuf, sendcount, datatype,
|
||||||
|
chain->chain_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
/* update the initial pointer to the buffer */
|
||||||
|
tmpbuf += realsegsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (MPI_SUCCESS);
|
||||||
|
error_hndl:
|
||||||
|
fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
mca_coll_tuned_bcast_intra_pipeline ( void *buffer,
|
||||||
|
int count,
|
||||||
|
struct ompi_datatype_t *datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
uint32_t segsize )
|
||||||
|
{
|
||||||
|
int rank; /* remove when removing print statement */
|
||||||
|
rank = ompi_comm_rank(comm); /* remove when removing print statement */
|
||||||
|
printf("mca_coll_tuned_bcast_intra_pipeline rank %d root %d\n", rank, root);
|
||||||
|
|
||||||
|
return mca_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm,
|
||||||
|
segsize, 1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
mca_coll_tuned_bcast_intra_bintree ( void* buffer,
|
||||||
|
int count,
|
||||||
|
struct ompi_datatype_t* datatype,
|
||||||
|
int root,
|
||||||
|
struct ompi_communicator_t* comm,
|
||||||
|
uint32_t segsize )
|
||||||
|
{
|
||||||
|
int err=0, line;
|
||||||
|
int rank, size;
|
||||||
|
int segindex, i, lr, pair;
|
||||||
|
int segcount[2]; /* Number of elements sent with each segment */
|
||||||
|
uint32_t counts[2];
|
||||||
|
int num_segments[2]; /* Number of segmenets */
|
||||||
|
int sendcount[2]; /* the same like segcount, except for the last segment */
|
||||||
|
int realsegsize[2];
|
||||||
|
char *tmpbuf[2];
|
||||||
|
int type_size;
|
||||||
|
long type_extent;
|
||||||
|
MPI_Request base_req, new_req;
|
||||||
|
ompi_coll_tree_t *tree;
|
||||||
|
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
printf("mca_coll_tuned_bcast_intra_bintree rank %d root %d\n", rank, root);
|
||||||
|
|
||||||
|
if (size == 1) {
|
||||||
|
return MPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* setup the tree topology.
|
||||||
|
* if the previous tree topology is the same, then use this cached copy
|
||||||
|
* other wise recreate it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root)
|
||||||
|
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) {
|
||||||
|
tree = comm->c_coll_selected_data->cached_tree;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous chain if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree);
|
||||||
|
}
|
||||||
|
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
|
||||||
|
comm->c_coll_selected_data->cached_tree_root = root;
|
||||||
|
comm->c_coll_selected_data->cached_tree_fanout = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
err = ompi_ddt_type_size( datatype, &type_size );
|
||||||
|
|
||||||
|
/* Determine number of segments and number of elements per segment */
|
||||||
|
counts[0] = count/2;
|
||||||
|
if (count % 2 != 0) counts[0]++;
|
||||||
|
counts[1] = count - counts[0];
|
||||||
|
if ( segsize > 0 ) {
|
||||||
|
segcount[0] = segcount[1] = segsize / type_size;
|
||||||
|
num_segments[0] = counts[0]/segcount[0];
|
||||||
|
if ((counts[0] % segcount[0]) != 0) num_segments[0]++;
|
||||||
|
num_segments[1] = counts[1]/segcount[1];
|
||||||
|
if ((counts[1] % segcount[1]) != 0) num_segments[1]++;
|
||||||
|
} else {
|
||||||
|
segcount[0] = counts[0];
|
||||||
|
segcount[1] = counts[1];
|
||||||
|
num_segments[0] = num_segments[1] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if the message is too small to be split into segments */
|
||||||
|
if( (counts[0] == 0 || counts[1] == 0) ||
|
||||||
|
(segsize > counts[0] * type_size) ||
|
||||||
|
(segsize > counts[1] * type_size) ) {
|
||||||
|
/* call linear version here ! */
|
||||||
|
return (mca_coll_tuned_bcast_intra_bintree( buffer, count, datatype,
|
||||||
|
root, comm, segsize ));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine real segment size */
|
||||||
|
realsegsize[0] = segcount[0] * type_extent;
|
||||||
|
realsegsize[1] = segcount[1] * type_extent;
|
||||||
|
|
||||||
|
/* set the buffer pointers */
|
||||||
|
tmpbuf[0] = (char *) buffer;
|
||||||
|
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
|
||||||
|
|
||||||
|
/* Step 1:
|
||||||
|
Root splits the buffer in 2 and sends segmented message down the branches.
|
||||||
|
Left subtree of the tree receives first half od the buffer, while right
|
||||||
|
subtree receives the remaining message.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* determine if I am left (0) or right (1), (root is right) */
|
||||||
|
lr = ((rank + size - root)%size + 1)%2;
|
||||||
|
|
||||||
|
/* root code */
|
||||||
|
if( rank == root ) {
|
||||||
|
/* determine segment count */
|
||||||
|
sendcount[0] = segcount[0];
|
||||||
|
sendcount[1] = segcount[1];
|
||||||
|
/* for each segment */
|
||||||
|
for (segindex = 0; segindex < num_segments[0]; segindex++) {
|
||||||
|
/* for each child */
|
||||||
|
for( i = 0; i < tree->tree_nextsize && i < 2; i++ ) {
|
||||||
|
if (segindex >= num_segments[i]) { /* no more segments */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* determine how many elements are being sent in this round */
|
||||||
|
if(segindex == (num_segments[i] - 1))
|
||||||
|
sendcount[i] = counts[i] - segindex*segcount[i];
|
||||||
|
/* send data */
|
||||||
|
MCA_PML_CALL(send(tmpbuf[i], sendcount[i], datatype,
|
||||||
|
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
/* update tmp buffer */
|
||||||
|
tmpbuf[i] += realsegsize[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* intermediate nodes code */
|
||||||
|
else if( tree->tree_nextsize > 0 ) {
|
||||||
|
/* Intermediate nodes:
|
||||||
|
* It will receive segments only from one half od the data.
|
||||||
|
* Which one is determined by whether the node belongs to the "left" or "right"
|
||||||
|
* subtree. Topoloby building function builds binary tree such that
|
||||||
|
* odd "shifted ranks" ((rank + size - root)%size) are on the left subtree,
|
||||||
|
* and even on the right subtree.
|
||||||
|
*
|
||||||
|
* Create the pipeline. We first post the first receive, then in the loop we
|
||||||
|
* post the next receive and after that wait for the previous receive to complete
|
||||||
|
* and we disseminating the data to all children.
|
||||||
|
*/
|
||||||
|
sendcount[lr] = segcount[lr];
|
||||||
|
MCA_PML_CALL(irecv(tmpbuf[lr], sendcount[lr], datatype,
|
||||||
|
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, &base_req));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
|
||||||
|
for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
|
||||||
|
/* determine how many elements to expect in this round */
|
||||||
|
if( segindex == (num_segments[lr] - 1))
|
||||||
|
sendcount[lr] = counts[lr] - segindex*segcount[lr];
|
||||||
|
/* post new irecv */
|
||||||
|
MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
|
||||||
|
datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, &new_req));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
|
||||||
|
/* wait for and forward current segment */
|
||||||
|
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||||
|
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */
|
||||||
|
MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype,
|
||||||
|
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
} /* end of for each child */
|
||||||
|
|
||||||
|
/* upate the base request */
|
||||||
|
base_req = new_req;
|
||||||
|
/* go to the next buffer (ie. the one corresponding to the next recv) */
|
||||||
|
tmpbuf[lr] += realsegsize[lr];
|
||||||
|
} /* end of for segindex */
|
||||||
|
|
||||||
|
/* wait for the last segment and forward current segment */
|
||||||
|
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||||
|
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */
|
||||||
|
MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype,
|
||||||
|
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
} /* end of for each child */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* leaf nodes */
|
||||||
|
else {
|
||||||
|
/* Create the pipeline. We first post the first receive, then in the loop we
|
||||||
|
* post the next receive and after that wait for the previous receive to complete
|
||||||
|
* and we disseminating the data to all children.
|
||||||
|
*/
|
||||||
|
sendcount[lr] = segcount[lr];
|
||||||
|
for (segindex = 0; segindex < num_segments[lr]; segindex++) {
|
||||||
|
/* determine how many elements to expect in this round */
|
||||||
|
if (segindex == (num_segments[lr] - 1)) sendcount[lr] = counts[lr] - segindex*segcount[lr];
|
||||||
|
/* receive segments */
|
||||||
|
MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype,
|
||||||
|
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
/* update the initial pointer to the buffer */
|
||||||
|
tmpbuf[lr] += realsegsize[lr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset the buffer pointers */
|
||||||
|
tmpbuf[0] = (char *) buffer;
|
||||||
|
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
|
||||||
|
|
||||||
|
/* Step 2:
|
||||||
|
Find your immediate pair (identical node in opposite subtree) and SendRecv
|
||||||
|
data buffer with them.
|
||||||
|
The tree building function ensures that
|
||||||
|
if (we are not root)
|
||||||
|
if we are in the left subtree (lr == 0) our pair is (rank+1)%size.
|
||||||
|
if we are in the right subtree (lr == 1) our pair is (rank-1)%size
|
||||||
|
If we have even number of nodes the rank (size-1) will pair up with root.
|
||||||
|
*/
|
||||||
|
if (lr == 0) {
|
||||||
|
pair = (rank+1)%size;
|
||||||
|
} else {
|
||||||
|
pair = (rank+size-1)%size;
|
||||||
|
}
|
||||||
|
if ( (size%2) != 0 && rank != root) {
|
||||||
|
|
||||||
|
err = ompi_mpi_sendrecv( tmpbuf[lr], counts[lr], datatype,
|
||||||
|
pair, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
|
||||||
|
pair, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE );
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
} else if ( (size%2) == 0 ) {
|
||||||
|
/* root sends right buffer to the last node */
|
||||||
|
if( rank == root ) {
|
||||||
|
MCA_PML_CALL(send(tmpbuf[1], counts[1], datatype,
|
||||||
|
(root+size-1)%size, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
|
||||||
|
}
|
||||||
|
/* last node receives right buffer from the root */
|
||||||
|
else if (rank == size - 1) {
|
||||||
|
MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype,
|
||||||
|
root, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE));
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
}
|
||||||
|
/* everyone else exchanges buffers */
|
||||||
|
else {
|
||||||
|
err = ompi_mpi_sendrecv( tmpbuf[lr], counts[lr], datatype,
|
||||||
|
pair, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
|
||||||
|
pair, MCA_COLL_BASE_TAG_BCAST,
|
||||||
|
comm, MPI_STATUS_IGNORE );
|
||||||
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (MPI_SUCCESS);
|
||||||
|
|
||||||
|
error_hndl:
|
||||||
|
fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
62
ompi/mca/coll/tuned/coll_tuned_bcast_decision.c
Обычный файл
62
ompi/mca/coll/tuned/coll_tuned_bcast_decision.c
Обычный файл
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "include/constants.h"
|
||||||
|
#include "datatype/datatype.h"
|
||||||
|
#include "communicator/communicator.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "mca/coll/base/coll_tags.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
#include "mca/pml/pml.h"
|
||||||
|
#include "opal/util/bit_ops.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bcast_intra_dec
|
||||||
|
*
|
||||||
|
* Function: - seletects broadcast algorithm to use
|
||||||
|
* Accepts: - same arguments as MPI_Bcast()
|
||||||
|
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
|
||||||
|
*/
|
||||||
|
int mca_coll_tuned_bcast_intra_dec(void *buff, int count,
|
||||||
|
struct ompi_datatype_t *datatype, int root,
|
||||||
|
struct ompi_communicator_t *comm)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int size;
|
||||||
|
int rank;
|
||||||
|
int err;
|
||||||
|
int contig;
|
||||||
|
int dsize;
|
||||||
|
|
||||||
|
printf("mca_coll_tuned_bcast_intra_dec\n");
|
||||||
|
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
/* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */
|
||||||
|
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (8192)); */
|
||||||
|
/* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */
|
||||||
|
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (8192));
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
125
ompi/mca/coll/tuned/coll_tuned_component.c
Обычный файл
125
ompi/mca/coll/tuned/coll_tuned_component.c
Обычный файл
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*
|
||||||
|
* These symbols are in a file by themselves to provide nice linker
|
||||||
|
* semantics. Since linkers generally pull in symbols by object
|
||||||
|
* files, keeping these symbols as the only symbols in this file
|
||||||
|
* prevents utility programs such as "ompi_info" from having to import
|
||||||
|
* entire components just to query their version and parameters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Public string showing the coll ompi_tuned component version number
|
||||||
|
*/
|
||||||
|
const char *mca_coll_tuned_component_version_string =
|
||||||
|
"Open MPI tuned collective MCA component version " OMPI_VERSION;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Global variable
|
||||||
|
*/
|
||||||
|
int mca_coll_tuned_priority_param = -1;
|
||||||
|
int mca_coll_tuned_init_tree_fanout_param = -1;
|
||||||
|
int mca_coll_tuned_init_chain_fanout_param = -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local function
|
||||||
|
*/
|
||||||
|
static int tuned_open(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Instantiate the public struct with all of our public information
|
||||||
|
* and pointers to our public functions in it
|
||||||
|
*/
|
||||||
|
|
||||||
|
const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = {
|
||||||
|
|
||||||
|
/* First, the mca_component_t struct containing meta information
|
||||||
|
about the component itself */
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Indicate that we are a coll v1.0.0 component (which also implies a
|
||||||
|
specific MCA version) */
|
||||||
|
|
||||||
|
MCA_COLL_BASE_VERSION_1_0_0,
|
||||||
|
|
||||||
|
/* Component name and version */
|
||||||
|
|
||||||
|
"tuned",
|
||||||
|
OMPI_MAJOR_VERSION,
|
||||||
|
OMPI_MINOR_VERSION,
|
||||||
|
OMPI_RELEASE_VERSION,
|
||||||
|
|
||||||
|
/* Component open and close functions */
|
||||||
|
|
||||||
|
tuned_open,
|
||||||
|
NULL
|
||||||
|
},
|
||||||
|
|
||||||
|
/* Next the MCA v1.0.0 component meta data */
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Whether the component is checkpointable or not */
|
||||||
|
|
||||||
|
true
|
||||||
|
},
|
||||||
|
|
||||||
|
/* Initialization / querying functions */
|
||||||
|
|
||||||
|
mca_coll_tuned_init_query,
|
||||||
|
mca_coll_tuned_comm_query,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static int tuned_open(void)
|
||||||
|
{
|
||||||
|
/* mca_coll_tuned_component_t *ct = &mca_coll_tuned_component; */
|
||||||
|
|
||||||
|
/* Use a low priority, but allow other components to be lower */
|
||||||
|
|
||||||
|
mca_coll_tuned_priority_param =
|
||||||
|
mca_base_param_register_int("coll", "tuned", "priority", NULL, 30);
|
||||||
|
|
||||||
|
mca_coll_tuned_init_tree_fanout_param =
|
||||||
|
mca_base_param_register_int("coll", "tuned", "init_tree_fanout", NULL, 2);
|
||||||
|
|
||||||
|
mca_coll_tuned_init_chain_fanout_param =
|
||||||
|
mca_base_param_register_int("coll", "tuned", "init_chain_fanout", NULL, 4);
|
||||||
|
|
||||||
|
printf("mca_coll_tuned_init_tree_fanout_param %d\nmca_coll_tuned_init_chain_fanout_param %d\n", mca_coll_tuned_init_tree_fanout_param,
|
||||||
|
mca_coll_tuned_init_chain_fanout_param);
|
||||||
|
|
||||||
|
/* use the newer interface rsn */
|
||||||
|
/* mca_coll_tuned_priority_param = mca_base_param_reg_int(&(ct->super), "priority", "Priority of the tuned coll component", */
|
||||||
|
/* false, false, 30, NULL); */
|
||||||
|
|
||||||
|
/* mca_base_param_reg_int(&(ct->super), "init_tree_fanout", "Fan out used for [balanced] tree topologies in the tuned coll component", */
|
||||||
|
/* false, false, 2, NULL); */
|
||||||
|
|
||||||
|
/* mca_base_param_reg_int(&(ct->super), "init_chain_fanout", */
|
||||||
|
/* "Fan out used for chain [1 fanout followed by pipelines] topology in the tuned coll component", */
|
||||||
|
/* false, false, 2, NULL); */
|
||||||
|
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
306
ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c
Обычный файл
306
ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c
Обычный файл
@ -0,0 +1,306 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "ompi/include/constants.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "coll_tuned_dynamic_rules.h"
|
||||||
|
|
||||||
|
int static rule_count = 0;
|
||||||
|
int static cond_count = 0;
|
||||||
|
|
||||||
|
/* makes a rule */
|
||||||
|
/* this is hanging in space until linked to either other rules or a */
|
||||||
|
/* collective decision function */
|
||||||
|
rule_t* mk_rule ()
|
||||||
|
{
|
||||||
|
rule_t* ptr;
|
||||||
|
|
||||||
|
ptr = (rule_t*) calloc (1, sizeof(rule_t));
|
||||||
|
if (!ptr) {
|
||||||
|
fprintf(stderr,"calloc on mk_rule failed!\n");
|
||||||
|
exit (-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set values in the hanging rule */
|
||||||
|
ptr->rule_id = rule_count++;
|
||||||
|
|
||||||
|
return (ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* adds a condition to a rule */
|
||||||
|
int mk_and_add_condition_to_rule (rule_t* rule, param_index_t param,
|
||||||
|
condition_op_t op, int target)
|
||||||
|
{
|
||||||
|
condition_t* ptr;
|
||||||
|
condition_t* last;
|
||||||
|
|
||||||
|
if (!rule) {
|
||||||
|
fprintf(stderr,"rule given in add_condition_to_rule is NULL?!\n");
|
||||||
|
return (-2);
|
||||||
|
}
|
||||||
|
if (param>=PARAMS) {
|
||||||
|
fprintf(stderr,"param given in add_condition_to_rule is %d?!\n", param);
|
||||||
|
return (-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = (condition_t*) calloc (1, sizeof(condition_t));
|
||||||
|
if (!ptr) {
|
||||||
|
fprintf(stderr,"calloc on add_condition_to_rule failed!\n");
|
||||||
|
return (-5);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set values in the condition */
|
||||||
|
ptr->cond_id = cond_count++;
|
||||||
|
ptr->param = param;
|
||||||
|
ptr->op = op;
|
||||||
|
ptr->value = target;
|
||||||
|
|
||||||
|
/* set values in the rule */
|
||||||
|
if (rule->nconditions) { /* if we already have conditions add to the last one */
|
||||||
|
last = rule->last_condition;
|
||||||
|
last->next = ptr;
|
||||||
|
}
|
||||||
|
else { /* its the very first / head condition */
|
||||||
|
rule->first_condition = ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* common to both all cases */
|
||||||
|
rule->nconditions++;
|
||||||
|
rule->last_condition = ptr;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* attaches a rules/collective functions TO a rule (not otherway round) */
|
||||||
|
int set_rule_links (rule_t * rule, ifp true_fptr, int* true_extraargs,
|
||||||
|
ifp false_fptr, int* false_extraargs,
|
||||||
|
rule_t* true_rule, rule_t* false_rule)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (!rule) {
|
||||||
|
fprintf(stderr,"rule given in set_rule_links is NULL?!\n");
|
||||||
|
return (-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check rule results.. we must have one set for true and one for false */
|
||||||
|
if ((true_fptr)&&(true_rule)) {
|
||||||
|
fprintf(stderr,"BAD. Two links for TRUE on rule %d!\n", rule->rule_id);
|
||||||
|
return (-6);
|
||||||
|
}
|
||||||
|
if ((false_fptr)&&(false_rule)) {
|
||||||
|
fprintf(stderr,"BAD. Two links for FALSE on rule %d!\n", rule->rule_id);
|
||||||
|
return (-7);
|
||||||
|
}
|
||||||
|
if ((!true_fptr)&&(!true_rule)) {
|
||||||
|
fprintf(stderr,"BAD. NO links for TRUE on rule %d!\n", rule->rule_id);
|
||||||
|
return (-8);
|
||||||
|
}
|
||||||
|
if ((!false_fptr)&&(!false_rule)) {
|
||||||
|
fprintf(stderr,"BAD. NO links for FALSE on rule %d!\n", rule->rule_id);
|
||||||
|
return (-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* can set the links now */
|
||||||
|
rule->true_fptr = true_fptr;
|
||||||
|
rule->true_extraargs = true_extraargs;
|
||||||
|
rule->next_true_rule = (struct rules_s *) true_rule;
|
||||||
|
rule->false_fptr = false_fptr;
|
||||||
|
rule->false_extraargs = false_extraargs;
|
||||||
|
rule->next_false_rule = (struct rules_s *) false_rule;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* free rule (and all attached conditions */
|
||||||
|
/* oneday multiple rules might be able to share conditions to save memory */
|
||||||
|
int free_rule (rule_t *rule)
|
||||||
|
{
|
||||||
|
condition_t* ptr;
|
||||||
|
condition_t* next;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!rule) {
|
||||||
|
fprintf(stderr,"rule given in free_rule is NULL?!\n");
|
||||||
|
return (-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* free conditions first */
|
||||||
|
if (rule->nconditions) {
|
||||||
|
ptr = rule->first_condition;
|
||||||
|
for (i=0;i<rule->nconditions;i++) {
|
||||||
|
next = ptr->next;
|
||||||
|
free (ptr);
|
||||||
|
ptr = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* all conditions freed, free the rule */
|
||||||
|
free (rule);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* evaluates a rule and returns the final function pointer that matches */
|
||||||
|
int eval_rule (rule_t *rule, params_t *params, ifp* fptr, int** extraargs)
|
||||||
|
{
|
||||||
|
rule_t* currentrule;
|
||||||
|
condition_t* currentcond;
|
||||||
|
int true=1;
|
||||||
|
|
||||||
|
if (!rule) {
|
||||||
|
fprintf(stderr,"rule given in eval_rule is NULL?!\n");
|
||||||
|
return (-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* first special case is a very fast path... sorta not really grr */
|
||||||
|
if (!rule->nconditions) {
|
||||||
|
#ifdef VERBOSE
|
||||||
|
printf("Rule %d has no conditions so forcing first available\n",
|
||||||
|
rule->rule_id);
|
||||||
|
#endif /* VERBOSE */
|
||||||
|
*fptr = rule->true_fptr;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ok we have some conditions so start the evaluation */
|
||||||
|
/* make it as lazy as possible, so a single false condition bumps us */
|
||||||
|
|
||||||
|
currentrule = rule;
|
||||||
|
|
||||||
|
while (currentrule) { /* rules to evaluate */
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Rule %d ", currentrule->rule_id);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* eval each of the current rules conditions */
|
||||||
|
/* we do this until we have either: */
|
||||||
|
/* (a) completed all conditions and are still true */
|
||||||
|
/* (b) found a single false */
|
||||||
|
/* once we have either a true true or false we can then check */
|
||||||
|
/* the next values and see if we are returning a function */
|
||||||
|
/* OR skipping to the next rule and hense iterate */
|
||||||
|
|
||||||
|
true = 1; /* we are ok so far */
|
||||||
|
|
||||||
|
/* first get the first condition to eval */
|
||||||
|
currentcond = currentrule->first_condition;
|
||||||
|
|
||||||
|
while ((currentcond)&&(true)) { /* while conditions to eval */
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Cond %d ", currentcond->cond_id);
|
||||||
|
#endif
|
||||||
|
switch (currentcond->op) {
|
||||||
|
case LT: if (params->values[currentcond->param] < currentcond->value) {true = 1;}
|
||||||
|
else {true = 0;}
|
||||||
|
break;
|
||||||
|
case LTEQ: if (params->values[currentcond->param] <= currentcond->value) {true = 1;}
|
||||||
|
else {true = 0;}
|
||||||
|
break;
|
||||||
|
case GT: if (params->values[currentcond->param] > currentcond->value) {true = 1;}
|
||||||
|
else {true = 0;}
|
||||||
|
break;
|
||||||
|
case GTEQ: if (params->values[currentcond->param] >= currentcond->value) {true = 1;}
|
||||||
|
else {true = 0;}
|
||||||
|
break;
|
||||||
|
case EQ: if (params->values[currentcond->param] == currentcond->value) {true = 1;}
|
||||||
|
else {true = 0;}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Eval: BAD operator of value %d rule %d cond %d\n",
|
||||||
|
currentcond->op, currentrule->rule_id,
|
||||||
|
currentcond->cond_id);
|
||||||
|
true = 0;
|
||||||
|
return (-1); /* ?! what else can I do, should have caught before */
|
||||||
|
} /* switch on condition operator */
|
||||||
|
|
||||||
|
/* if we are still true we go to the next condition if there is one */
|
||||||
|
/* if there is not another then we are truely true */
|
||||||
|
/* else if we are false, immediately fall out */
|
||||||
|
|
||||||
|
if (!true) {
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Cond %d returned FALSE\n", currentcond->cond_id);
|
||||||
|
#endif
|
||||||
|
break; /* if false drop out asap */
|
||||||
|
}
|
||||||
|
if ((true)&&(currentcond->next)) { /* next condition to check */
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Cond %d returned TRUE. Moving to next\n", currentcond->cond_id);
|
||||||
|
#endif
|
||||||
|
currentcond = currentcond->next;
|
||||||
|
}
|
||||||
|
else { /* we are true with no more conditions to check */
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Cond %d (LAST) returned TRUE.\n", currentcond->cond_id);
|
||||||
|
#endif
|
||||||
|
break; /* so return so we can find out what to do next */
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* while conditions to eval or a false */
|
||||||
|
|
||||||
|
/* condition is either fully met or lazy false */
|
||||||
|
/* we do these IFs in the fasted/most important order */
|
||||||
|
if ((true)&&(currentrule->true_fptr)) {
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Rule %d is TRUE returning fptr\n", currentrule->rule_id);
|
||||||
|
#endif
|
||||||
|
*fptr = currentrule->true_fptr;
|
||||||
|
*extraargs = currentrule->true_extraargs;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
if ((!true)&&(currentrule->false_fptr)) {
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Rule %d is FALSE returning fptr\n", currentrule->rule_id);
|
||||||
|
#endif
|
||||||
|
*fptr = currentrule->false_fptr;
|
||||||
|
*extraargs = currentrule->false_extraargs;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
if (true) {
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Rule %d is TRUE jumping to next rule %d\n",
|
||||||
|
currentrule->rule_id,
|
||||||
|
((rule_t*)(currentrule->next_true_rule))->rule_id);
|
||||||
|
#endif
|
||||||
|
currentrule = (rule_t *) currentrule->next_true_rule;
|
||||||
|
}
|
||||||
|
else { /* i.e. not true / lazy eval */
|
||||||
|
#ifdef RULEVERBOSE
|
||||||
|
printf("Eval Rule %d is FALSE jumping to next rule %d\n",
|
||||||
|
currentrule->rule_id,
|
||||||
|
((rule_t*)(currentrule->next_false_rule))->rule_id);
|
||||||
|
#endif
|
||||||
|
currentrule = (rule_t *) currentrule->next_false_rule;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!currentrule) {
|
||||||
|
fprintf(stderr, "eval: disaster, we have gone off into the weeds.. panic!\n");
|
||||||
|
exit (-10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
119
ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h
Обычный файл
119
ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h
Обычный файл
@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_COLL_TUNED_DYNAMIC_RULES_EXPORT_H
|
||||||
|
#define MCA_COLL_TUNED_DYNAMIC_RULES_EXPORT_H
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "ompi/include/constants.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* type is prob not needed - GEF */
|
||||||
|
typedef enum {
|
||||||
|
RULE,
|
||||||
|
FPTR
|
||||||
|
} type_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
LT,
|
||||||
|
LTEQ,
|
||||||
|
GT,
|
||||||
|
GTEQ,
|
||||||
|
EQ
|
||||||
|
} condition_op_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
COMMSIZE,
|
||||||
|
COMMPOWER2,
|
||||||
|
DATASIZE,
|
||||||
|
DATAPOWER2,
|
||||||
|
DATACONTG,
|
||||||
|
NONZEROROOT
|
||||||
|
} param_index_t;
|
||||||
|
|
||||||
|
#define PARAMS 6
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int values[PARAMS];
|
||||||
|
} params_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct condition_s {
|
||||||
|
int cond_id;
|
||||||
|
param_index_t param;
|
||||||
|
condition_op_t op;
|
||||||
|
int value;
|
||||||
|
struct condition_s *next;
|
||||||
|
} condition_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*ifp)();
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct rule_s {
|
||||||
|
int rule_id;
|
||||||
|
/* type_t type; */
|
||||||
|
/* not sure if we need different types of RULEs yet? */
|
||||||
|
/* maybe a faster single condition rule to avoid loops and more ifs */
|
||||||
|
/* current eval thinks that a nconditions value of 0 = true fptr */
|
||||||
|
int nconditions;
|
||||||
|
/* we have a ptr to first and last just to speed up eval and add_to */
|
||||||
|
condition_t *first_condition;
|
||||||
|
condition_t *last_condition;
|
||||||
|
ifp true_fptr;
|
||||||
|
int* true_extraargs;
|
||||||
|
ifp false_fptr;
|
||||||
|
int* false_extraargs;
|
||||||
|
struct rules_s* next_true_rule;
|
||||||
|
struct rules_s* next_false_rule;
|
||||||
|
} rule_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rule_t* mk_rule ();
|
||||||
|
|
||||||
|
int mk_and_add_condition_to_rule (rule_t* rule, param_index_t param,
|
||||||
|
condition_op_t op, int target);
|
||||||
|
|
||||||
|
int set_rule_links (rule_t * rule, ifp true_fptr, int* true_extraargs,
|
||||||
|
ifp false_fptr, int* false_extraargs,
|
||||||
|
rule_t* true_rule, rule_t* false_rule);
|
||||||
|
|
||||||
|
int free_rule (rule_t *rule);
|
||||||
|
|
||||||
|
int eval_rule (rule_t* rule, params_t* params, ifp* fptr, int** extraargs);
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* MCA_COLL_TUNED_DYNAMIC_RULES_EXPORT_H */
|
276
ompi/mca/coll/tuned/coll_tuned_module.c
Обычный файл
276
ompi/mca/coll/tuned/coll_tuned_module.c
Обычный файл
@ -0,0 +1,276 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "communicator/communicator.h"
|
||||||
|
#include "mca/base/mca_base_param.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "mca/coll/base/base.h"
|
||||||
|
#include "coll_tuned.h"
|
||||||
|
#include "coll_tuned_topo.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Which set are we using?
|
||||||
|
*/
|
||||||
|
static const mca_coll_base_module_1_0_0_t *to_use = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Intra communicator decision functions
|
||||||
|
*/
|
||||||
|
static const mca_coll_base_module_1_0_0_t intra = {
|
||||||
|
|
||||||
|
/* Initialization / finalization functions */
|
||||||
|
|
||||||
|
mca_coll_tuned_module_init,
|
||||||
|
mca_coll_tuned_module_finalize,
|
||||||
|
|
||||||
|
/* Collective function pointers */
|
||||||
|
|
||||||
|
/* mca_coll_tuned_allgather_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_allgatherv_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_allreduce_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoall_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoallv_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoallw_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_barrier_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
mca_coll_tuned_bcast_intra_dec,
|
||||||
|
/* NULL, */
|
||||||
|
/* mca_coll_tuned_exscan_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_gather_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_gatherv_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_reduce_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_reduce_scatter_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scan_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scatter_intra_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scatterv_intra_dec */
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* collective decision functions for intercommunicators
|
||||||
|
*/
|
||||||
|
static const mca_coll_base_module_1_0_0_t inter = {
|
||||||
|
|
||||||
|
/* Initialization / finalization functions */
|
||||||
|
|
||||||
|
mca_coll_tuned_module_init,
|
||||||
|
mca_coll_tuned_module_finalize,
|
||||||
|
|
||||||
|
/* Collective function pointers */
|
||||||
|
|
||||||
|
/* mca_coll_tuned_allgather_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_allgatherv_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_allreduce_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoall_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoallv_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_alltoallw_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_barrier_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_bcast_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_exscan_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_gather_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_gatherv_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_reduce_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_reduce_scatter_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scan_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scatter_inter_dec, */
|
||||||
|
NULL,
|
||||||
|
/* mca_coll_tuned_scatterv_inter_dec */
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Note I keep the names here as place markers until I have implemented the functions */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initial query function that is invoked during MPI_INIT, allowing
|
||||||
|
* this component to disqualify itself if it doesn't support the
|
||||||
|
* required level of thread support.
|
||||||
|
*/
|
||||||
|
int mca_coll_tuned_init_query(bool enable_progress_threads,
|
||||||
|
bool enable_mpi_threads)
|
||||||
|
{
|
||||||
|
/* Nothing to do */
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Invoked when there's a new communicator that has been created.
|
||||||
|
* Look at the communicator and decide which set of functions and
|
||||||
|
* priority we want to return.
|
||||||
|
*/
|
||||||
|
const mca_coll_base_module_1_0_0_t *
|
||||||
|
mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||||
|
struct mca_coll_base_comm_t **data)
|
||||||
|
{
|
||||||
|
|
||||||
|
printf("Tuned query called\n");
|
||||||
|
if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_tuned_priority_param,
|
||||||
|
priority)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Choose whether to use [intra|inter] decision functions */
|
||||||
|
|
||||||
|
if (OMPI_COMM_IS_INTER(comm)) {
|
||||||
|
to_use = &inter;
|
||||||
|
} else {
|
||||||
|
to_use = &intra;
|
||||||
|
}
|
||||||
|
|
||||||
|
return to_use;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Init module on the communicator
|
||||||
|
*/
|
||||||
|
const struct mca_coll_base_module_1_0_0_t *
|
||||||
|
mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
struct mca_coll_base_comm_t *data;
|
||||||
|
/* fanout parameters */
|
||||||
|
int tree_fanout_default = 2;
|
||||||
|
int chain_fanout_default = 4;
|
||||||
|
|
||||||
|
|
||||||
|
printf("Tuned init module called.\n");
|
||||||
|
|
||||||
|
/* This routine will become more complex and might have to be broken into sections */
|
||||||
|
|
||||||
|
/* Order of operations:
|
||||||
|
* alloc memory for nb reqs (in case we fall through)
|
||||||
|
* add decision rules if using dynamic rules
|
||||||
|
* compact rules using communicator size info etc
|
||||||
|
* build first guess cached topologies (might depend on the rules from above)
|
||||||
|
*
|
||||||
|
* then attach all to the communicator and return base module funct ptrs
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Allocate the data that hangs off the communicator */
|
||||||
|
|
||||||
|
if (OMPI_COMM_IS_INTER(comm)) {
|
||||||
|
size = ompi_comm_remote_size(comm);
|
||||||
|
} else {
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
}
|
||||||
|
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||||
|
(sizeof(ompi_request_t *) * size * 2));
|
||||||
|
|
||||||
|
if (NULL == data) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
data->mccb_reqs = (ompi_request_t **) (data + 1);
|
||||||
|
data->mccb_num_reqs = size * 2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* now for the cached topo functions
|
||||||
|
* guess the initial topologies to use rank 0 as root
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* get default fanouts is made available via the MCA */
|
||||||
|
tree_fanout_default = 2; /* make it binary for now */
|
||||||
|
chain_fanout_default = 4;
|
||||||
|
|
||||||
|
|
||||||
|
data->cached_tree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
|
||||||
|
data->cached_tree_root = 0;
|
||||||
|
data->cached_tree_fanout = tree_fanout_default;
|
||||||
|
|
||||||
|
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
|
||||||
|
data->cached_tree_root = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* chains (fanout followed by pipelines)
|
||||||
|
* are more difficuilt as the fan out really really depends on message size [sometimes]..
|
||||||
|
* as size gets larger fan-out gets smaller [usually]
|
||||||
|
*
|
||||||
|
* will probably change how we cache this later, for now a midsize
|
||||||
|
* GEF
|
||||||
|
*/
|
||||||
|
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
|
||||||
|
data->cached_chain_root = 0;
|
||||||
|
data->cached_chain_fanout = chain_fanout_default;
|
||||||
|
|
||||||
|
/* All done */
|
||||||
|
|
||||||
|
comm->c_coll_selected_data = data;
|
||||||
|
|
||||||
|
printf("Tuned looks like it is in use :)\n");
|
||||||
|
return to_use;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finalize module on the communicator
|
||||||
|
*/
|
||||||
|
int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
|
||||||
|
{
|
||||||
|
if (NULL == comm->c_coll_selected_module) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_DEBUG
|
||||||
|
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
|
||||||
|
the generel c_coll_selected_data */
|
||||||
|
|
||||||
|
comm->c_coll_selected_data->mccb_reqs = NULL;
|
||||||
|
comm->c_coll_selected_data->mccb_num_reqs = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* All done */
|
||||||
|
|
||||||
|
free(comm->c_coll_selected_data);
|
||||||
|
comm->c_coll_selected_data = NULL;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
378
ompi/mca/coll/tuned/coll_tuned_topo.c
Обычный файл
378
ompi/mca/coll/tuned/coll_tuned_topo.c
Обычный файл
@ -0,0 +1,378 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "ompi/include/constants.h"
|
||||||
|
#include "datatype/datatype.h"
|
||||||
|
#include "communicator/communicator.h"
|
||||||
|
#include "mca/coll/coll.h"
|
||||||
|
#include "mca/coll/base/coll_tags.h"
|
||||||
|
#include "mca/pml/pml.h"
|
||||||
|
#include "coll_tuned_topo.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some static helpers.
|
||||||
|
*/
|
||||||
|
static int pown( int fanout, int num )
|
||||||
|
{
|
||||||
|
int j, p = 1;
|
||||||
|
if( num < 0 ) return 0;
|
||||||
|
for( j = 0; j < num; j++ ) p*= fanout;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int calculate_level( int fanout, int rank )
|
||||||
|
{
|
||||||
|
int level, num;
|
||||||
|
if( rank < 0 ) return -1;
|
||||||
|
for( level = 0, num = 0; num <= rank; level++ ) {
|
||||||
|
num += pown(fanout, level);
|
||||||
|
}
|
||||||
|
return level-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int calculate_num_nodes_up_to_level( int fanout, int level )
|
||||||
|
{
|
||||||
|
/* just use geometric progression formula for sum:
|
||||||
|
a^0+a^1+...a^(n-1) = (a^n-1)/(a-1) */
|
||||||
|
return ((pown(fanout,level) - 1)/(fanout - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* And now the building functions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ompi_coll_tree_t*
|
||||||
|
ompi_coll_tuned_topo_build_tree( int fanout,
|
||||||
|
struct ompi_communicator_t* comm,
|
||||||
|
int root )
|
||||||
|
{
|
||||||
|
int rank, size;
|
||||||
|
int schild, sparent;
|
||||||
|
int level; /* location of my rank in the tree structure of size */
|
||||||
|
int delta; /* number of nodes on my level */
|
||||||
|
int slimit; /* total number of nodes on levels above me */
|
||||||
|
int shiftedrank;
|
||||||
|
int i;
|
||||||
|
ompi_coll_tree_t* tree;
|
||||||
|
|
||||||
|
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get size and rank of the process in this communicator
|
||||||
|
*/
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
||||||
|
tree->tree_root = MPI_UNDEFINED;
|
||||||
|
tree->tree_nextsize = MPI_UNDEFINED;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if we calculated the tree for this root and
|
||||||
|
* fanout combination already (on this communicator)
|
||||||
|
*/
|
||||||
|
if( (root == tree->tree_root) && (fanout == tree->tree_fanout) ) {
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set root
|
||||||
|
*/
|
||||||
|
tree->tree_root = root;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize tree
|
||||||
|
*/
|
||||||
|
tree->tree_fanout = fanout;
|
||||||
|
tree->tree_root = root;
|
||||||
|
tree->tree_prev = -1;
|
||||||
|
tree->tree_nextsize = 0;
|
||||||
|
for( i = 0; i < fanout; i++ ) {
|
||||||
|
tree->tree_next[i] = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return if we have less than 2 processes */
|
||||||
|
if( size < 2 ) {
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Shift all ranks by root, so that the algorithm can be
|
||||||
|
* designed as if root would be always 0
|
||||||
|
* shiftedrank should be used in calculating distances
|
||||||
|
* and position in tree
|
||||||
|
*/
|
||||||
|
shiftedrank = rank - root;
|
||||||
|
if( shiftedrank < 0 ) {
|
||||||
|
shiftedrank += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate my level */
|
||||||
|
level = calculate_level( fanout, shiftedrank );
|
||||||
|
delta = pown( fanout, level );
|
||||||
|
|
||||||
|
/* find my children */
|
||||||
|
for( i = 0; i < fanout; i++ ) {
|
||||||
|
schild = shiftedrank + delta * (i+1);
|
||||||
|
if( schild < size ) {
|
||||||
|
tree->tree_next[i] = (schild+root)%size;
|
||||||
|
tree->tree_nextsize = tree->tree_nextsize + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find my parent */
|
||||||
|
slimit = calculate_num_nodes_up_to_level( fanout, level );
|
||||||
|
sparent = shiftedrank;
|
||||||
|
if( sparent < fanout ) {
|
||||||
|
sparent = 0;
|
||||||
|
} else {
|
||||||
|
while( sparent >= slimit ) {
|
||||||
|
sparent -= delta/fanout;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tree->tree_prev = (sparent+root)%size;
|
||||||
|
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
|
||||||
|
{
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ompi_coll_bmtree_t*
|
||||||
|
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||||
|
int root )
|
||||||
|
{
|
||||||
|
int childs = 0;
|
||||||
|
int rank;
|
||||||
|
int size;
|
||||||
|
int mask = 1;
|
||||||
|
int index;
|
||||||
|
int remote;
|
||||||
|
ompi_coll_bmtree_t *bmtree;
|
||||||
|
|
||||||
|
printf("Building tuned topo bmtree: rt %d\n", root);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get size and rank of the process in this communicator
|
||||||
|
*/
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
index = rank -root;
|
||||||
|
|
||||||
|
bmtree = (ompi_coll_bmtree_t*)malloc(sizeof(ompi_coll_bmtree_t));
|
||||||
|
bmtree->bmtree_root = MPI_UNDEFINED;
|
||||||
|
bmtree->bmtree_nextsize = MPI_UNDEFINED;
|
||||||
|
|
||||||
|
if( bmtree->bmtree_root == root ) {
|
||||||
|
/* the bmtree was computed before */
|
||||||
|
return bmtree;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( index < 0 ) index += size;
|
||||||
|
|
||||||
|
while( mask <= index ) mask <<= 1;
|
||||||
|
|
||||||
|
/* Now I can compute my father rank */
|
||||||
|
if( root == rank ) {
|
||||||
|
bmtree->bmtree_prev = root;
|
||||||
|
} else {
|
||||||
|
remote = (index ^ (mask >> 1)) + root;
|
||||||
|
if( remote >= size ) remote -= size;
|
||||||
|
bmtree->bmtree_prev = remote;
|
||||||
|
}
|
||||||
|
/* And now let's fill my childs */
|
||||||
|
while( mask < size ) {
|
||||||
|
remote = (index ^ mask);
|
||||||
|
if( remote >= size ) break;
|
||||||
|
remote += root;
|
||||||
|
if( remote >= size ) remote -= size;
|
||||||
|
bmtree->bmtree_next[childs] = remote;
|
||||||
|
mask <<= 1;
|
||||||
|
childs++;
|
||||||
|
}
|
||||||
|
bmtree->bmtree_nextsize = childs;
|
||||||
|
bmtree->bmtree_root = root;
|
||||||
|
return bmtree;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree )
|
||||||
|
{
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ompi_coll_chain_t*
|
||||||
|
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||||
|
struct ompi_communicator_t* comm,
|
||||||
|
int root )
|
||||||
|
{
|
||||||
|
int rank, size;
|
||||||
|
int srank; /* shifted rank */
|
||||||
|
int i,maxchainlen;
|
||||||
|
int mark,head,len;
|
||||||
|
ompi_coll_chain_t *chain;
|
||||||
|
|
||||||
|
printf("Building tuned topo chain: fo %d rt %d\n", fanout, root);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get size and rank of the process in this communicator
|
||||||
|
*/
|
||||||
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
if( fanout > MAXTREEFANOUT ) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate space for topology arrays if needed
|
||||||
|
*/
|
||||||
|
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
|
||||||
|
chain->chain_root = MPI_UNDEFINED;
|
||||||
|
chain->chain_nextsize = -1;
|
||||||
|
chain->chain_numchain = -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if we calculated the topology for this root and comm
|
||||||
|
*/
|
||||||
|
if( (root == chain->chain_root) &&
|
||||||
|
(fanout == chain->chain_numchain) ) {
|
||||||
|
return chain;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Set root & numchain
|
||||||
|
*/
|
||||||
|
chain->chain_root = root;
|
||||||
|
if( (size - 1) < fanout ) {
|
||||||
|
chain->chain_numchain = size-1;
|
||||||
|
chain->chain_nextsize = size-1;
|
||||||
|
fanout = size-1;
|
||||||
|
} else {
|
||||||
|
chain->chain_numchain = fanout;
|
||||||
|
chain->chain_nextsize = fanout;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Shift ranks
|
||||||
|
*/
|
||||||
|
srank = rank - root;
|
||||||
|
if (srank < 0) srank += size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special case - fanout == 1
|
||||||
|
*/
|
||||||
|
if( fanout == 1 ) {
|
||||||
|
if( srank == 0 ) chain->chain_prev = -1;
|
||||||
|
else chain->chain_prev = (srank-1+root)%size;
|
||||||
|
|
||||||
|
if( (srank + 1) >= size) {
|
||||||
|
chain->chain_next[0] = -1;
|
||||||
|
chain->chain_nextsize = 0;
|
||||||
|
} else {
|
||||||
|
chain->chain_next[0] = (srank+1+root)%size;
|
||||||
|
chain->chain_nextsize = 1;
|
||||||
|
}
|
||||||
|
return chain;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Let's handle the case where there is just one node in the communicator */
|
||||||
|
if( size == 1 ) {
|
||||||
|
chain->chain_next[0] = -1;
|
||||||
|
chain->chain_nextsize = 0;
|
||||||
|
chain->chain_prev = -1;
|
||||||
|
chain->chain_numchain = 0;
|
||||||
|
return chain;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Calculate maximum chain length
|
||||||
|
*/
|
||||||
|
maxchainlen = (size-1) / fanout;
|
||||||
|
if( (size-1) % fanout != 0 ) {
|
||||||
|
maxchainlen++;
|
||||||
|
mark = (size-1)%fanout;
|
||||||
|
} else {
|
||||||
|
mark = fanout+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find your own place in the list of shifted ranks
|
||||||
|
*/
|
||||||
|
if( srank != 0 ) {
|
||||||
|
int column;
|
||||||
|
if( srank-1 < (mark * maxchainlen) ) {
|
||||||
|
column = (srank-1)/maxchainlen;
|
||||||
|
head = 1+column*maxchainlen;
|
||||||
|
len = maxchainlen;
|
||||||
|
} else {
|
||||||
|
column = mark + (srank-1-mark*maxchainlen)/(maxchainlen-1);
|
||||||
|
head = mark*maxchainlen+1+(column-mark)*(maxchainlen-1);
|
||||||
|
len = maxchainlen-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( srank == head ) {
|
||||||
|
chain->chain_prev = 0; /*root*/
|
||||||
|
} else {
|
||||||
|
chain->chain_prev = srank-1; /* rank -1 */
|
||||||
|
}
|
||||||
|
if( srank == (head + len - 1) ) {
|
||||||
|
chain->chain_next[0] = -1;
|
||||||
|
chain->chain_nextsize = 0;
|
||||||
|
} else {
|
||||||
|
if( (srank + 1) < size ) {
|
||||||
|
chain->chain_next[0] = srank+1;
|
||||||
|
chain->chain_nextsize = 1;
|
||||||
|
} else {
|
||||||
|
chain->chain_next[0] = -1;
|
||||||
|
chain->chain_nextsize = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unshift values
|
||||||
|
*/
|
||||||
|
if( rank == root ) {
|
||||||
|
chain->chain_prev = -1;
|
||||||
|
chain->chain_next[0] = (root+1)%size;
|
||||||
|
for( i = 1; i < fanout; i++ ) {
|
||||||
|
chain->chain_next[i] = chain->chain_next[i-1] + maxchainlen;
|
||||||
|
if( i > mark ) {
|
||||||
|
chain->chain_next[i]--;
|
||||||
|
}
|
||||||
|
chain->chain_next[i] %= size;
|
||||||
|
}
|
||||||
|
chain->chain_nextsize = fanout;
|
||||||
|
} else {
|
||||||
|
chain->chain_prev = (chain->chain_prev+root)%size;
|
||||||
|
if( chain->chain_next[0] != -1 ) {
|
||||||
|
chain->chain_next[0] = (chain->chain_next[0]+root)%size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return chain;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
|
||||||
|
{
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
74
ompi/mca/coll/tuned/coll_tuned_topo.h
Обычный файл
74
ompi/mca/coll/tuned/coll_tuned_topo.h
Обычный файл
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
|
||||||
|
#define MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MAXTREEFANOUT 32
|
||||||
|
|
||||||
|
typedef struct ompi_coll_tree_t {
|
||||||
|
int32_t tree_root;
|
||||||
|
int32_t tree_fanout;
|
||||||
|
int32_t tree_prev;
|
||||||
|
int32_t tree_next[MAXTREEFANOUT];
|
||||||
|
int32_t tree_nextsize;
|
||||||
|
} ompi_coll_tree_t;
|
||||||
|
|
||||||
|
typedef struct ompi_coll_bmtree_t {
|
||||||
|
int32_t bmtree_root;
|
||||||
|
int32_t bmtree_prev;
|
||||||
|
int32_t bmtree_next[MAXTREEFANOUT];
|
||||||
|
int32_t bmtree_nextsize;
|
||||||
|
} ompi_coll_bmtree_t;
|
||||||
|
|
||||||
|
typedef struct ompi_coll_chain_t {
|
||||||
|
int32_t chain_root;
|
||||||
|
int32_t chain_prev;
|
||||||
|
int32_t chain_next[MAXTREEFANOUT];
|
||||||
|
int32_t chain_nextsize;
|
||||||
|
int32_t chain_numchain;
|
||||||
|
} ompi_coll_chain_t;
|
||||||
|
|
||||||
|
ompi_coll_tree_t*
|
||||||
|
ompi_coll_tuned_topo_build_tree( int fanout,
|
||||||
|
struct ompi_communicator_t* com,
|
||||||
|
int root );
|
||||||
|
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
|
||||||
|
|
||||||
|
ompi_coll_bmtree_t*
|
||||||
|
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||||
|
int root );
|
||||||
|
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree );
|
||||||
|
|
||||||
|
ompi_coll_chain_t*
|
||||||
|
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||||
|
struct ompi_communicator_t* com,
|
||||||
|
int root );
|
||||||
|
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain );
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED */
|
||||||
|
|
21
ompi/mca/coll/tuned/configure.params
Обычный файл
21
ompi/mca/coll/tuned/configure.params
Обычный файл
@ -0,0 +1,21 @@
|
|||||||
|
# -*- shell-script -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
# Specific to this module
|
||||||
|
|
||||||
|
PARAM_INIT_FILE=coll_tuned.c
|
||||||
|
PARAM_CONFIG_FILES=Makefile
|
Загрузка…
x
Ссылка в новой задаче
Block a user