1
1

Complete the dismantle of the tuned module.

Этот коммит содержится в:
George Bosilca 2015-02-15 14:48:08 -05:00
родитель aa019e239e
Коммит 211f05fb09
9 изменённых файлов: 418 добавлений и 491 удалений

Просмотреть файл

@ -22,7 +22,8 @@ headers += \
base/base.h \
base/coll_tags.h \
base/coll_base_topo.h \
base/coll_base_util.h
base/coll_base_util.h \
base/coll_base_functions.h
libmca_coll_la_SOURCES += \
base/coll_base_comm_select.c \

Просмотреть файл

@ -2,7 +2,7 @@
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2009 The University of Tennessee and The University
# Copyright (c) 2004-2015 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -19,29 +19,25 @@
sources = \
coll_tuned.h \
coll_tuned_topo.h \
coll_tuned_util.h \
coll_tuned_dynamic_file.h \
coll_tuned_dynamic_rules.h \
coll_tuned_topo.c \
coll_tuned_util.c \
coll_tuned_decision_fixed.c \
coll_tuned_decision_dynamic.c \
coll_tuned_dynamic_file.c \
coll_tuned_dynamic_rules.c \
coll_tuned_allreduce.c \
coll_tuned_alltoall.c \
coll_tuned_alltoallv.c \
coll_tuned_allgather.c \
coll_tuned_allgatherv.c \
coll_tuned_barrier.c \
coll_tuned_bcast.c \
coll_tuned_reduce.c \
coll_tuned_reduce_scatter.c \
coll_tuned_gather.c \
coll_tuned_scatter.c \
coll_tuned_component.c \
coll_tuned_module.c
coll_tuned_module.c \
coll_tuned_allgather_decision.c \
coll_tuned_allgatherv_decision.c \
coll_tuned_allreduce_decision.c \
coll_tuned_alltoall_decision.c \
coll_tuned_gather_decision.c \
coll_tuned_alltoallv_decision.c \
coll_tuned_barrier_decision.c \
coll_tuned_reduce_decision.c \
coll_tuned_bcast_decision.c \
coll_tuned_reduce_scatter_decision.c \
coll_tuned_scatter_decision.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

@ -18,7 +18,7 @@
#include "mpi.h"
#include "opal/mca/mca.h"
#include "ompi/request/request.h"
#include "ompi/mca/coll/base/coll_base.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
/* also need the dynamic rule structures */
#include "coll_tuned_dynamic_rules.h"

Просмотреть файл

@ -272,56 +272,13 @@ static int tuned_close(void)
static void
mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module)
{
module->tuned_data = NULL;
}
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
static void
mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module)
{
mca_coll_tuned_comm_t *data;
/* Free the space in the data mpool and the data hanging off the
communicator */
data = module->tuned_data;
if (NULL != data) {
#if OPAL_ENABLE_DEBUG
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
the generel c_coll_selected_data */
data->mcct_reqs = NULL;
data->mcct_num_reqs = 0;
#endif
/* free any cached information that has been allocated */
if (data->cached_ntree) { /* destroy general tree if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree);
}
if (data->cached_bintree) { /* destroy bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree);
}
if (data->cached_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree);
}
if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree);
}
if (data->cached_chain) { /* destroy general chain if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_chain);
}
if (data->cached_pipeline) { /* destroy pipeline if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline);
}
if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree);
}
free(data);
for( int i = 0; i < COLLCOUNT; i++ ) {
tuned_module->user_forced[i].algorithm = 0;
tuned_module->com_rules[i] = NULL;
}
}
OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t,
mca_coll_base_module_t,
mca_coll_tuned_module_construct,
mca_coll_tuned_module_destruct);
OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t, mca_coll_base_module_t,
mca_coll_tuned_module_construct, NULL);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,9 +28,6 @@
#include "ompi/mca/coll/base/coll_tags.h"
#include "coll_tuned.h"
#include "coll_tuned.h"
/*
* Notes on evaluation rules and ordering
*
@ -58,12 +55,11 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[ALLREDUCE]) {
if (tuned_module->com_rules[ALLREDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize, ignoreme;
size_t dsize;
@ -71,7 +67,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
ompi_datatype_type_size (dtype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLREDUCE],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLREDUCE],
dsize, &faninout, &segsize, &ignoreme);
if (alg) {
@ -82,7 +78,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[ALLREDUCE].algorithm) {
if (tuned_module->user_forced[ALLREDUCE].algorithm) {
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op,
comm, module);
}
@ -106,12 +102,11 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[ALLTOALL]) {
if (tuned_module->com_rules[ALLTOALL]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int comsize;
int alg, faninout, segsize, max_requests;
@ -121,7 +116,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
comsize = ompi_comm_size(comm);
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALL],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALL],
dsize, &faninout, &segsize, &max_requests);
if (alg) {
@ -133,7 +128,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[ALLTOALL].algorithm) {
if (tuned_module->user_forced[ALLTOALL].algorithm) {
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
@ -157,7 +152,6 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoallv_intra_dec_dynamic"));
@ -167,10 +161,10 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
* This allow the users to specify the alltoallv algorithm to be used only
* based on the communicator size.
*/
if (data->com_rules[ALLTOALLV]) {
if (tuned_module->com_rules[ALLTOALLV]) {
int alg, faninout, segsize, max_requests;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALLV],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALLV],
0, &faninout, &segsize, &max_requests);
if (alg) {
@ -182,7 +176,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[ALLTOALLV].algorithm) {
if (tuned_module->user_forced[ALLTOALLV].algorithm) {
return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module);
@ -203,16 +197,15 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_barrier_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[BARRIER]) {
if (tuned_module->com_rules[BARRIER]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize, ignoreme;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[BARRIER],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BARRIER],
0, &faninout, &segsize, &ignoreme);
if (alg) {
@ -222,7 +215,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[BARRIER].algorithm) {
if (tuned_module->user_forced[BARRIER].algorithm) {
return ompi_coll_tuned_barrier_intra_do_forced (comm, module);
}
return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
@ -241,12 +234,11 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[BCAST]) {
if (tuned_module->com_rules[BCAST]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize, ignoreme;
size_t dsize;
@ -254,7 +246,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
ompi_datatype_type_size (datatype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[BCAST],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BCAST],
dsize, &faninout, &segsize, &ignoreme);
if (alg) {
@ -266,7 +258,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
} /*end if any com rules to check */
if (data->user_forced[BCAST].algorithm) {
if (tuned_module->user_forced[BCAST].algorithm) {
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root,
comm, module);
}
@ -289,12 +281,11 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[REDUCE]) {
if (tuned_module->com_rules[REDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize, max_requests;
@ -303,7 +294,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
ompi_datatype_type_size (datatype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCE],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCE],
dsize, &faninout, &segsize, &max_requests);
if (alg) {
@ -317,7 +308,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[REDUCE].algorithm) {
if (tuned_module->user_forced[REDUCE].algorithm) {
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype,
op, root,
comm, module);
@ -344,12 +335,11 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_intra_dec_dynamic"));
/* check to see if we have some filebased rules */
if (data->com_rules[REDUCESCATTER]) {
if (tuned_module->com_rules[REDUCESCATTER]) {
/* we do, so calc the message size or what ever we need and use
this for the evaluation */
int alg, faninout, segsize, ignoreme, i, count, size;
@ -359,7 +349,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
ompi_datatype_type_size (dtype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCESCATTER],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCESCATTER],
dsize, &faninout,
&segsize, &ignoreme);
if (alg) {
@ -372,7 +362,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[REDUCESCATTER].algorithm) {
if (tuned_module->user_forced[REDUCESCATTER].algorithm) {
return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts,
dtype, op,
comm, module);
@ -399,12 +389,11 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"ompi_coll_tuned_allgather_intra_dec_dynamic"));
if (data->com_rules[ALLGATHER]) {
if (tuned_module->com_rules[ALLGATHER]) {
/* We have file based rules:
- calculate message size and other necessary information */
int comsize;
@ -415,7 +404,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
comsize = ompi_comm_size(comm);
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHER],
dsize, &faninout, &segsize, &ignoreme);
if (alg) {
/* we have found a valid choice from the file based rules for
@ -428,7 +417,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
}
/* We do not have file based rules */
if (data->user_forced[ALLGATHER].algorithm) {
if (tuned_module->user_forced[ALLGATHER].algorithm) {
/* User-forced algorithm */
return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
@ -459,12 +448,11 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"ompi_coll_tuned_allgatherv_intra_dec_dynamic"));
if (data->com_rules[ALLGATHERV]) {
if (tuned_module->com_rules[ALLGATHERV]) {
/* We have file based rules:
- calculate message size and other necessary information */
int comsize, i;
@ -476,7 +464,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
total_size = 0;
for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
total_size, &faninout, &segsize, &ignoreme);
if (alg) {
/* we have found a valid choice from the file based rules for
@ -490,7 +478,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
}
/* We do not have file based rules */
if (data->user_forced[ALLGATHERV].algorithm) {
if (tuned_module->user_forced[ALLGATHERV].algorithm) {
/* User-forced algorithm */
return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcounts,
@ -514,7 +502,6 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"ompi_coll_tuned_gather_intra_dec_dynamic"));
@ -522,7 +509,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
/**
* check to see if we have some filebased rules.
*/
if (data->com_rules[GATHER]) {
if (tuned_module->com_rules[GATHER]) {
int comsize, alg, faninout, segsize, max_requests;
size_t dsize;
@ -530,7 +517,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
ompi_datatype_type_size (sdtype, &dsize);
dsize *= comsize;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[GATHER],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[GATHER],
dsize, &faninout, &segsize, &max_requests);
if (alg) {
@ -542,7 +529,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[GATHER].algorithm) {
if (tuned_module->user_forced[GATHER].algorithm) {
return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
@ -561,7 +548,6 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"ompi_coll_tuned_scatter_intra_dec_dynamic"));
@ -569,7 +555,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
/**
* check to see if we have some filebased rules.
*/
if (data->com_rules[SCATTER]) {
if (tuned_module->com_rules[SCATTER]) {
int comsize, alg, faninout, segsize, max_requests;
size_t dsize;
@ -577,7 +563,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
ompi_datatype_type_size (sdtype, &dsize);
dsize *= comsize;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[SCATTER],
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[SCATTER],
dsize, &faninout, &segsize, &max_requests);
if (alg) {
@ -589,7 +575,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
} /* found a method */
} /*end if any com rules to check */
if (data->user_forced[SCATTER].algorithm) {
if (tuned_module->user_forced[SCATTER].algorithm) {
return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,7 +31,6 @@
#include "ompi/op/op.h"
#include "coll_tuned.h"
/*
* allreduce_intra
*
@ -40,7 +39,7 @@
* Returns: - MPI_SUCCESS or error code
*/
int
ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
ompi_coll_tuned_allreduce_intra_dec_fixed(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
@ -62,7 +61,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
block_dsize = dsize * (ptrdiff_t)count;
if (block_dsize < intermediate_message) {
return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf,
return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
count, dtype,
op, comm, module));
}
@ -70,17 +69,17 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
if( ompi_op_is_commute(op) && (count > comm_size) ) {
const size_t segment_size = 1 << 20; /* 1 MB */
if (((size_t)comm_size * (size_t)segment_size >= block_dsize)) {
return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype,
return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype,
op, comm, module));
} else {
return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf,
return (ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf,
count, dtype,
op, comm, module,
segment_size));
}
}
return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count,
return (ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count,
dtype, op, comm, module));
}
@ -109,7 +108,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
/* special case */
if (communicator_size==2) {
return ompi_coll_tuned_alltoall_intra_two_procs(sbuf, scount, sdtype,
return ompi_coll_base_alltoall_intra_two_procs(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
@ -123,17 +122,17 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_small_msg)
&& (communicator_size > 12)) {
return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype,
return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
} else if (block_dsize < (size_t) ompi_coll_tuned_alltoall_intermediate_msg) {
return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype,
return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype,
return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
@ -148,12 +147,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
ompi_comm_rank(comm), communicator_size, total_dsize));
if (communicator_size >= 12 && total_dsize <= 768) {
return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
}
if (total_dsize <= 131072) {
return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
}
return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
#endif
}
@ -170,7 +169,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_fixed(void *sbuf, int *scounts, int *sdi
mca_coll_base_module_t *module)
{
/* For starters, just keep the original algorithm. */
return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm, module);
}
@ -192,7 +191,7 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm,
communicator_size));
if( 2 == communicator_size )
return ompi_coll_tuned_barrier_intra_two_procs(comm, module);
return ompi_coll_base_barrier_intra_two_procs(comm, module);
/**
* Basic optimisation. If we have a power of 2 number of nodes
* the use the recursive doubling algorithm, otherwise
@ -203,14 +202,12 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm,
for( ; communicator_size > 0; communicator_size >>= 1 ) {
if( communicator_size & 0x1 ) {
if( has_one )
return ompi_coll_tuned_barrier_intra_bruck(comm, module);
return ompi_coll_base_barrier_intra_bruck(comm, module);
has_one = true;
}
}
}
return ompi_coll_tuned_barrier_intra_recursivedoubling(comm, module);
/* return ompi_coll_tuned_barrier_intra_linear(comm); */
/* return ompi_coll_tuned_barrier_intra_doublering(comm); */
return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
}
@ -256,14 +253,14 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
if ((message_size < small_message_size) || (count <= 1)) {
/* Binomial without segmentation */
segsize = 0;
return ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype,
return ompi_coll_base_bcast_intra_binomial(buff, count, datatype,
root, comm, module,
segsize);
} else if (message_size < intermediate_message_size) {
/* SplittedBinary with 1KB segments */
segsize = 1024;
return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype,
return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
root, comm, module,
segsize);
@ -272,28 +269,28 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
else if (communicator_size < (a_p128 * message_size + b_p128)) {
/* Pipeline with 128KB segments */
segsize = 1024 << 7;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype,
return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
root, comm, module,
segsize);
} else if (communicator_size < 13) {
/* Split Binary with 8KB segments */
segsize = 1024 << 3;
return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype,
return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
root, comm, module,
segsize);
} else if (communicator_size < (a_p64 * message_size + b_p64)) {
/* Pipeline with 64KB segments */
segsize = 1024 << 6;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype,
return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
root, comm, module,
segsize);
} else if (communicator_size < (a_p16 * message_size + b_p16)) {
/* Pipeline with 16KB segments */
segsize = 1024 << 4;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype,
return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
root, comm, module,
segsize);
@ -301,35 +298,35 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
/* Pipeline with 8KB segments */
segsize = 1024 << 3;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype,
return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
root, comm, module,
segsize);
#if 0
/* this is based on gige measurements */
if (communicator_size < 4) {
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
}
if (communicator_size == 4) {
if (message_size < 524288) segsize = 0;
else segsize = 16384;
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
}
if (communicator_size <= 8 && message_size < 4096) {
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
}
if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
segsize = 16384;
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
}
if (message_size >= 524288) {
segsize = 16384;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize);
return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype, root, comm, module, segsize);
}
segsize = 0;
/* once tested can swap this back in */
/* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
/* return ompi_coll_base_bcast_intra_bmtree(buff, count, datatype, root, comm, segsize); */
return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
#endif /* 0 */
}
@ -372,9 +369,9 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
*/
if( !ompi_op_is_commute(op) ) {
if ((communicator_size < 12) && (message_size < 2048)) {
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
return ompi_coll_base_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
}
return ompi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
0, max_requests);
}
@ -384,27 +381,27 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
if ((communicator_size < 8) && (message_size < 512)){
/* Linear_0K */
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
} else if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
/* Binomial_0K */
segsize = 0;
return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
} else if (communicator_size > (a1 * message_size + b1)) {
/* Binomial_1K */
segsize = 1024;
return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
} else if (communicator_size > (a2 * message_size + b2)) {
/* Pipeline_1K */
segsize = 1024;
return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
} else if (communicator_size > (a3 * message_size + b3)) {
/* Binary_32K */
segsize = 32*1024;
return ompi_coll_tuned_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
return ompi_coll_base_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
comm, module, segsize, max_requests);
}
if (communicator_size > (a4 * message_size + b4)) {
@ -414,7 +411,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
/* Pipeline_64K */
segsize = 64*1024;
}
return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
#if 0
@ -424,8 +421,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
fanout = communicator_size - 1;
/* when linear implemented or taken from basic put here, right now using chain as a linear system */
/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
/* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
}
if (message_size < 524288) {
if (message_size <= 65536 ) {
@ -437,11 +433,11 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
}
/* later swap this for a binary tree */
/* fanout = 2; */
return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_chain(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, fanout, max_requests);
}
segsize = 1024;
return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
#endif /* 0 */
}
@ -479,7 +475,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
}
if( !ompi_op_is_commute(op) ) {
return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts,
return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
dtype, op,
comm, module);
}
@ -493,11 +489,11 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
(comm_size >= a * total_message_size + b)) {
return
ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
dtype, op,
comm, module);
}
return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
dtype, op,
comm, module);
}
@ -525,7 +521,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
/* Special case for 2 processes */
if (communicator_size == 2) {
return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
@ -550,21 +546,21 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
*/
if (total_dsize < 50000) {
if (pow2_size == communicator_size) {
return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
} else {
return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
} else {
if (communicator_size % 2) {
return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
} else {
return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
@ -581,15 +577,15 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
} else if (total_dsize <= 81920) {
return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
}
return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
#endif /* defined(USE_MPICH2_DECISION) */
@ -620,7 +616,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,
/* Special case for 2 processes */
if (communicator_size == 2) {
return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
}
@ -639,16 +635,16 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,
/* Decision based on allgather decision. */
if (total_dsize < 50000) {
return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype,
return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
} else {
if (communicator_size % 2) {
return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype,
return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
} else {
return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
}
@ -701,13 +697,13 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount,
}
if (block_size > large_block_size) {
return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module,
large_segment_size);
} else if (block_size > intermediate_block_size) {
return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module,
small_segment_size);
@ -715,13 +711,12 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount,
} else if ((communicator_size > large_communicator_size) ||
((communicator_size > small_communicator_size) &&
(block_size < small_block_size))) {
return ompi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype,
return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
}
/* Otherwise, use basic linear */
return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
}
@ -763,11 +758,11 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount,
if ((communicator_size > small_comm_size) &&
(block_size < small_block_size)) {
return ompi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype,
return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
}
return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
}

Просмотреть файл

@ -1,9 +1,8 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,7 +27,7 @@
#include "coll_tuned.h"
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
#include "coll_tuned_topo.h"
#include "ompi/mca/coll/base/coll_base_topo.h"
/* also need the dynamic rule structures */
#include "coll_tuned_dynamic_rules.h"
@ -291,4 +290,3 @@ static long getnext (FILE *fptr)
if ('#' == trash) skiptonewline (fptr);
} while (1);
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -25,7 +25,7 @@
#include "coll_tuned.h"
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
#include "coll_tuned_topo.h"
#include "ompi/mca/coll/base/coll_base_topo.h"
/* also need the dynamic rule structures */
#include "coll_tuned_dynamic_rules.h"
@ -33,7 +33,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "coll_tuned_util.h"
#include "ompi/mca/coll/base/coll_base_util.h"
ompi_coll_alg_rule_t* ompi_coll_tuned_mk_alg_rules (int n_alg)
@ -389,4 +389,3 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul
/* return the algorithm/method to use */
return (best_msg_p->result_alg);
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -26,8 +26,8 @@
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/base/coll_base_topo.h"
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
#include "coll_tuned_dynamic_rules.h"
#include "coll_tuned_dynamic_file.h"
@ -145,20 +145,20 @@ ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
return (MPI_SUCCESS);
}
#define COLL_TUNED_EXECUTE_IF_DYNAMIC(DATA, TYPE, EXECUTE) \
#define COLL_TUNED_EXECUTE_IF_DYNAMIC(TMOD, TYPE, EXECUTE) \
{ \
int need_dynamic_decision = 0; \
ompi_coll_tuned_forced_getvalues( (TYPE), &((DATA)->user_forced[(TYPE)]) ); \
(DATA)->com_rules[(TYPE)] = NULL; \
if( 0 != (DATA)->user_forced[(TYPE)].algorithm ) { \
ompi_coll_tuned_forced_getvalues( (TYPE), &((TMOD)->user_forced[(TYPE)]) ); \
(TMOD)->com_rules[(TYPE)] = NULL; \
if( 0 != (TMOD)->user_forced[(TYPE)].algorithm ) { \
need_dynamic_decision = 1; \
EXECUTE; \
} \
if( NULL != mca_coll_tuned_component.all_base_rules ) { \
(DATA)->com_rules[(TYPE)] \
(TMOD)->com_rules[(TYPE)] \
= ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules, \
(TYPE), size ); \
if( NULL != (DATA)->com_rules[(TYPE)] ) { \
if( NULL != (TMOD)->com_rules[(TYPE)] ) { \
need_dynamic_decision = 1; \
} \
} \
@ -178,7 +178,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
{
int size;
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module;
mca_coll_tuned_comm_t *data = NULL;
mca_coll_base_comm_t *data = NULL;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));
@ -198,25 +198,20 @@ tuned_module_enable( mca_coll_base_module_t *module,
* we do check a MCA parameter to see if if we should allocate this memory
*
* The default is set very high
*
*/
/* if we within the memory/size limit, allow preallocated data */
data = OBJ_NEW(mca_coll_base_comm_t);
if (NULL == data) {
return OMPI_ERROR;
}
if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) {
data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) +
(sizeof(ompi_request_t *) * size * 2));
if (NULL == data) {
return OMPI_ERROR;
}
data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mcct_num_reqs = size * 2;
} else {
data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t));
if (NULL == data) {
data->mcct_reqs = (ompi_request_t**)malloc(sizeof(ompi_request_t*) * data->mcct_num_reqs);
if (NULL == data->mcct_reqs) {
OBJ_RELEASE(data);
return OMPI_ERROR;
}
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
}
if (ompi_coll_tuned_use_dynamic_rules) {
@ -230,37 +225,37 @@ tuned_module_enable( mca_coll_base_module_t *module,
* next dynamic state, recheck all forced rules as well
* warning, we should check to make sure this is really an INTRA comm here...
*/
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHER,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHER,
tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHERV,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHERV,
tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLREDUCE,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLREDUCE,
tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALL,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALL,
tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLV,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLV,
tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLW,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLW,
tuned_module->super.coll_alltoallw = NULL);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BARRIER,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BARRIER,
tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BCAST,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BCAST,
tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, EXSCAN,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, EXSCAN,
tuned_module->super.coll_exscan = NULL);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHER,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHER,
tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHERV,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHERV,
tuned_module->super.coll_gatherv = NULL);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCE,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCE,
tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCESCATTER,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCESCATTER,
tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCAN,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCAN,
tuned_module->super.coll_scan = NULL);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTER,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTER,
tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic);
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTERV,
COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTERV,
tuned_module->super.coll_scatterv = NULL);
if( false == ompi_coll_tuned_use_dynamic_rules ) {
@ -286,7 +281,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
data->cached_in_order_bintree = NULL;
/* All done */
tuned_module->tuned_data = data;
tuned_module->super.base_data = data;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
return OMPI_SUCCESS;