Rework the selection logic for the tuned collectives. All supported collectives
now are able to use the dynamic rules. Moreover, these rules are loaded only once, and stored at the component level. All communicators are able to use these rules (not only MPI_COMM_WORLD as until now). A lot of minor corrections, memory management issues and reduction in the amount of memory used by the tuned collectives. This commit was SVN r21825.
Этот коммит содержится в:
родитель
c3afac1d50
Коммит
23e8ce91ba
@ -2,7 +2,7 @@
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -22,14 +22,12 @@ sources = \
|
||||
coll_tuned_util.h \
|
||||
coll_tuned_dynamic_file.h \
|
||||
coll_tuned_dynamic_rules.h \
|
||||
coll_tuned_forced.h \
|
||||
coll_tuned_topo.c \
|
||||
coll_tuned_util.c \
|
||||
coll_tuned_decision_fixed.c \
|
||||
coll_tuned_decision_dynamic.c \
|
||||
coll_tuned_dynamic_file.c \
|
||||
coll_tuned_dynamic_rules.c \
|
||||
coll_tuned_forced.c \
|
||||
coll_tuned_allreduce.c \
|
||||
coll_tuned_alltoall.c \
|
||||
coll_tuned_alltoallv.c \
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -34,12 +34,26 @@
|
||||
/* also need the dynamic rule structures */
|
||||
#include "coll_tuned_dynamic_rules.h"
|
||||
|
||||
/* need the forced user choice structures */
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
/* some fixed value index vars to simplify certain operations */
|
||||
typedef enum COLLTYPE {ALLGATHER, ALLGATHERV, ALLREDUCE, ALLTOALL, ALLTOALLV, ALLTOALLW, BARRIER, BCAST,
|
||||
EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOUNT} COLLTYPE_T;
|
||||
typedef enum COLLTYPE {
|
||||
ALLGATHER = 0, /* 0 */
|
||||
ALLGATHERV, /* 1 */
|
||||
ALLREDUCE, /* 2 */
|
||||
ALLTOALL, /* 3 */
|
||||
ALLTOALLV, /* 4 */
|
||||
ALLTOALLW, /* 5 */
|
||||
BARRIER, /* 6 */
|
||||
BCAST, /* 7 */
|
||||
EXSCAN, /* 8 */
|
||||
GATHER, /* 9 */
|
||||
GATHERV, /* 10 */
|
||||
REDUCE, /* 11 */
|
||||
REDUCESCATTER, /* 12 */
|
||||
SCAN, /* 13 */
|
||||
SCATTER, /* 14 */
|
||||
SCATTERV, /* 15 */
|
||||
COLLCOUNT /* 16 end counter keep it as last element */
|
||||
} COLLTYPE_T;
|
||||
|
||||
/* defined arg lists to simply auto inclusion of user overriding decision functions */
|
||||
#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
|
||||
@ -60,9 +74,7 @@ EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOU
|
||||
#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
|
||||
/* end defined arg lists to simply auto inclusion of user overriding decision functions */
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* these are the same across all modules and are loaded at component query time */
|
||||
extern int ompi_coll_tuned_stream;
|
||||
@ -75,6 +87,30 @@ extern "C" {
|
||||
extern int ompi_coll_tuned_init_max_requests;
|
||||
|
||||
/* forced algorithm choices */
|
||||
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
||||
/* we get these at component query (so that registered values appear in ompi_infoi) */
|
||||
struct coll_tuned_force_algorithm_mca_param_indices_t {
|
||||
int algorithm_param_index; /* which algorithm you want to force */
|
||||
int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout_param_index; /* tree fanout/in to use */
|
||||
int chain_fanout_param_index; /* K-chain fanout/in to use */
|
||||
int max_requests_param_index; /* Maximum number of outstanding send or recv requests */
|
||||
};
|
||||
typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
|
||||
|
||||
|
||||
/* the following type is for storing actual value obtained from the MCA on each tuned module */
|
||||
/* via their mca param indices lookup in the component */
|
||||
/* this structure is stored once per collective type per communicator... */
|
||||
struct coll_tuned_force_algorithm_params_t {
|
||||
int algorithm; /* which algorithm you want to force */
|
||||
int segsize; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout; /* tree fanout/in to use */
|
||||
int chain_fanout; /* K-chain fanout/in to use */
|
||||
int max_requests; /* Maximum number of outstanding send or recv requests */
|
||||
};
|
||||
typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
|
||||
|
||||
/* the indices to the MCA params so that modules can look them up at open / comm create time */
|
||||
extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
|
||||
/* the actual max algorithm values (readonly), loaded at component open */
|
||||
@ -340,7 +376,6 @@ extern "C" {
|
||||
* for each communicator we cache the topo information so we can
|
||||
* reuse without regenerating if we change the root, [or fanout]
|
||||
* then regenerate and recache this information
|
||||
*
|
||||
*/
|
||||
|
||||
/* general tree with n fan out */
|
||||
@ -372,8 +407,6 @@ extern "C" {
|
||||
/* in-order binary tree (root of the in-order binary tree is rank 0) */
|
||||
ompi_coll_tree_t *cached_in_order_bintree;
|
||||
|
||||
/* extra data required by the decision functions */
|
||||
ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */
|
||||
/* moving to the component */
|
||||
ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
|
||||
|
||||
@ -381,9 +414,6 @@ extern "C" {
|
||||
/* previously we only had one shared copy, ops, it really is per comm/module */
|
||||
coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t;
|
||||
|
||||
struct mca_coll_tuned_module_t {
|
||||
@ -394,9 +424,7 @@ extern "C" {
|
||||
typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
END_C_DECLS
|
||||
|
||||
#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
|
||||
do { \
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -31,8 +31,7 @@
|
||||
#include "mpi.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "coll_tuned.h"
|
||||
|
||||
|
||||
#include "coll_tuned_dynamic_file.h"
|
||||
|
||||
/*
|
||||
* Public string showing the coll ompi_tuned component version number
|
||||
@ -58,7 +57,6 @@ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COL
|
||||
/* max algorithm values */
|
||||
int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
|
||||
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
@ -71,14 +69,10 @@ static int tuned_close(void);
|
||||
*/
|
||||
|
||||
mca_coll_tuned_component_t mca_coll_tuned_component = {
|
||||
|
||||
/* First, fill in the super */
|
||||
|
||||
{
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_COLL_BASE_VERSION_2_0_0,
|
||||
|
||||
@ -107,13 +101,14 @@ mca_coll_tuned_component_t mca_coll_tuned_component = {
|
||||
0,
|
||||
|
||||
/* Tuned component specific information */
|
||||
/* Note some of this WAS in the module */
|
||||
NULL /* ompi_coll_alg_rule_t ptr */
|
||||
};
|
||||
|
||||
|
||||
static int tuned_open(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
{
|
||||
int param;
|
||||
@ -177,6 +172,18 @@ static int tuned_open(void)
|
||||
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
|
||||
false, false, ompi_coll_tuned_dynamic_rules_filename,
|
||||
&ompi_coll_tuned_dynamic_rules_filename);
|
||||
if( ompi_coll_tuned_dynamic_rules_filename ) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]",
|
||||
ompi_coll_tuned_dynamic_rules_filename));
|
||||
rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename,
|
||||
&(mca_coll_tuned_component.all_base_rules), COLLCOUNT);
|
||||
if( rc >= 0 ) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc));
|
||||
} else {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n"));
|
||||
mca_coll_tuned_component.all_base_rules = NULL;
|
||||
}
|
||||
}
|
||||
ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
|
||||
ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
|
||||
ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
|
||||
@ -206,6 +213,11 @@ static int tuned_close(void)
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
|
||||
|
||||
if( NULL != mca_coll_tuned_component.all_base_rules ) {
|
||||
ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT);
|
||||
mca_coll_tuned_component.all_base_rules = NULL;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -256,15 +268,6 @@ mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module)
|
||||
ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree);
|
||||
}
|
||||
|
||||
#if 0 /* FIXME: */
|
||||
/* if any algorithm rules are cached on the communicator, only free them if its MCW */
|
||||
/* as this is the only place they are allocated by reading the decision configure file */
|
||||
if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) {
|
||||
if (comm->data->all_base_rules) {
|
||||
ompi_coll_tuned_free_all_rules (comm->data->all_base_rules, COLLCOUNT);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -134,7 +134,6 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
|
||||
if (data->user_forced[ALLTOALL].algorithm) {
|
||||
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
@ -163,16 +162,27 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoallv_intra_dec_dynamic"));
|
||||
|
||||
/*
|
||||
* BEGIN - File Based Rules
|
||||
*
|
||||
* Here is where we would check to see if we have some file based
|
||||
* rules. Currently, we do not, so move on to seeing if the user
|
||||
* specified a specific algorithm. If not, then use the fixed
|
||||
* decision code to decide.
|
||||
*
|
||||
* END - File Based Rules
|
||||
/**
|
||||
* check to see if we have some filebased rules. As we don't have global
|
||||
* knowledge about the total amount of data, use the first available rule.
|
||||
* This allow the users to specify the alltoallv algorithm to be used only
|
||||
* based on the communicator size.
|
||||
*/
|
||||
if (data->com_rules[ALLTOALLV]) {
|
||||
int alg, faninout, segsize, max_requests;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALLV],
|
||||
0, &faninout, &segsize, &max_requests);
|
||||
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_alltoallv_intra_do_this (sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module,
|
||||
alg);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (data->user_forced[ALLTOALLV].algorithm) {
|
||||
return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
@ -510,6 +520,29 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"ompi_coll_tuned_gather_intra_dec_dynamic"));
|
||||
|
||||
/**
|
||||
* check to see if we have some filebased rules.
|
||||
*/
|
||||
if (data->com_rules[GATHER]) {
|
||||
int comsize, alg, faninout, segsize, max_requests;
|
||||
size_t dsize;
|
||||
|
||||
comsize = ompi_comm_size(comm);
|
||||
ompi_datatype_type_size (sdtype, &dsize);
|
||||
dsize *= comsize;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[GATHER],
|
||||
dsize, &faninout, &segsize, &max_requests);
|
||||
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_gather_intra_do_this (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (data->user_forced[GATHER].algorithm) {
|
||||
return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
@ -534,6 +567,29 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"ompi_coll_tuned_scatter_intra_dec_dynamic"));
|
||||
|
||||
/**
|
||||
* check to see if we have some filebased rules.
|
||||
*/
|
||||
if (data->com_rules[SCATTER]) {
|
||||
int comsize, alg, faninout, segsize, max_requests;
|
||||
size_t dsize;
|
||||
|
||||
comsize = ompi_comm_size(comm);
|
||||
ompi_datatype_type_size (sdtype, &dsize);
|
||||
dsize *= comsize;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[SCATTER],
|
||||
dsize, &faninout, &segsize, &max_requests);
|
||||
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_scatter_intra_do_this (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (data->user_forced[SCATTER].algorithm) {
|
||||
return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -315,17 +315,11 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru
|
||||
i = best = 0;
|
||||
|
||||
while( i < alg_p->n_com_sizes ) {
|
||||
/* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */
|
||||
/* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */
|
||||
if (com_p->mpi_comsize <= mpi_comsize) {
|
||||
best = i;
|
||||
best_com_p = com_p;
|
||||
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */
|
||||
}
|
||||
else {
|
||||
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */
|
||||
if (com_p->mpi_comsize > mpi_comsize) {
|
||||
break;
|
||||
}
|
||||
best = i;
|
||||
best_com_p = com_p;
|
||||
/* go to the next entry */
|
||||
com_p++;
|
||||
i++;
|
||||
@ -359,26 +353,11 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul
|
||||
ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL;
|
||||
int i, best;
|
||||
|
||||
if (!base_com_rule) {
|
||||
/* No rule or zero rules */
|
||||
if( (NULL == base_com_rule) || (0 == base_com_rule->n_msg_sizes)) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!result_topo_faninout) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!result_segsize) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!max_requests) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */
|
||||
return (0); /* no msg sizes so no rule */
|
||||
}
|
||||
|
||||
/* ok have some msg sizes, now to find the one closest to my mpi_msgsize */
|
||||
|
||||
/* make a copy of the first msg rule */
|
||||
|
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_tuned.h"
|
||||
|
||||
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||
#include "coll_tuned_topo.h"
|
||||
|
||||
/* also need the dynamic rule structures */
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
#include "coll_tuned_util.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* We put all routines that handle the MCA user forced algorithm and parameter choices here */
|
||||
/* recheck the setting of forced, called on module create (i.e. for each new comm) */
|
||||
|
||||
int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values)
|
||||
{
|
||||
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
|
||||
mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize));
|
||||
mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout));
|
||||
mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout));
|
||||
mca_base_param_lookup_int (mca_params.max_requests_param_index, &(forced_values->max_requests));
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
/* special version of above just for barrier which only has one option available (at the moment...) */
|
||||
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values)
|
||||
{
|
||||
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED
|
||||
#define MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
||||
/* we get these at component query (so that registered values appear in ompi_infoi) */
|
||||
|
||||
struct coll_tuned_force_algorithm_mca_param_indices_t {
|
||||
int algorithm_param_index; /* which algorithm you want to force */
|
||||
int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout_param_index; /* tree fanout/in to use */
|
||||
int chain_fanout_param_index; /* K-chain fanout/in to use */
|
||||
int max_requests_param_index; /* Maximum number of outstanding send or recv requests */
|
||||
};
|
||||
|
||||
typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
|
||||
|
||||
|
||||
/* the following type is for storing actual value obtained from the MCA on each tuned module */
|
||||
/* via their mca param indices lookup in the component */
|
||||
/* this structure is stored once per collective type per communicator... */
|
||||
struct coll_tuned_force_algorithm_params_t {
|
||||
int algorithm; /* which algorithm you want to force */
|
||||
int segsize; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout; /* tree fanout/in to use */
|
||||
int chain_fanout; /* K-chain fanout/in to use */
|
||||
int max_requests; /* Maximum number of outstanding send or recv requests */
|
||||
};
|
||||
|
||||
typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
|
||||
|
||||
|
||||
/* prototypes */
|
||||
|
||||
int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values);
|
||||
|
||||
/* barrier has less options than any other collective so it gets its own special function */
|
||||
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -456,7 +456,7 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa
|
||||
&(requested_alg));
|
||||
if( 0 > requested_alg || requested_alg > max_alg ) {
|
||||
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
|
||||
opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
|
||||
opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to default(0)\n",
|
||||
requested_alg, max_alg );
|
||||
}
|
||||
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -31,7 +31,6 @@
|
||||
#include "coll_tuned_topo.h"
|
||||
#include "coll_tuned_dynamic_rules.h"
|
||||
#include "coll_tuned_dynamic_file.h"
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
static int tuned_module_enable(mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm);
|
||||
@ -62,24 +61,18 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called"));
|
||||
|
||||
/**
|
||||
* If it is inter-communicator and size is less than 2 we have specialized modules
|
||||
* to handle the intra collective communications.
|
||||
* No support for inter-communicator yet.
|
||||
*/
|
||||
if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
#if 0
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
|
||||
} else {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
|
||||
|
||||
}
|
||||
#endif
|
||||
/* tuned does not support inter-communicator yet */
|
||||
/**
|
||||
* If it is inter-communicator and size is less than 2 we have specialized modules
|
||||
* to handle the intra collective communications.
|
||||
*/
|
||||
if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
}
|
||||
@ -117,7 +110,6 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
tuned_module->super.coll_scan = NULL;
|
||||
tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic;
|
||||
tuned_module->super.coll_scatterv = NULL;
|
||||
|
||||
} else {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
|
||||
|
||||
@ -137,12 +129,31 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
tuned_module->super.coll_scan = NULL;
|
||||
tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed;
|
||||
tuned_module->super.coll_scatterv = NULL;
|
||||
|
||||
}
|
||||
|
||||
return &(tuned_module->super);
|
||||
}
|
||||
|
||||
/* We put all routines that handle the MCA user forced algorithm and parameter choices here */
|
||||
/* recheck the setting of forced, called on module create (i.e. for each new comm) */
|
||||
|
||||
static int
|
||||
ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
|
||||
coll_tuned_force_algorithm_params_t *forced_values )
|
||||
{
|
||||
coll_tuned_force_algorithm_mca_param_indices_t* mca_params;
|
||||
|
||||
mca_params = &(ompi_coll_tuned_forced_params[type]);
|
||||
|
||||
mca_base_param_lookup_int (mca_params->algorithm_param_index, &(forced_values->algorithm));
|
||||
if( BARRIER != type ) {
|
||||
mca_base_param_lookup_int (mca_params->segsize_param_index, &(forced_values->segsize));
|
||||
mca_base_param_lookup_int (mca_params->tree_fanout_param_index, &(forced_values->tree_fanout));
|
||||
mca_base_param_lookup_int (mca_params->chain_fanout_param_index, &(forced_values->chain_fanout));
|
||||
mca_base_param_lookup_int (mca_params->max_requests_param_index, &(forced_values->max_requests));
|
||||
}
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Init module on the communicator
|
||||
@ -151,20 +162,16 @@ static int
|
||||
tuned_module_enable( mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm )
|
||||
{
|
||||
int size;
|
||||
int size, i;
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module;
|
||||
mca_coll_tuned_comm_t *data = NULL;
|
||||
/* fanout parameters */
|
||||
int rc=0;
|
||||
int i;
|
||||
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));
|
||||
|
||||
/* This routine will become more complex and might have to be */
|
||||
/* broken into more sections/function calls */
|
||||
|
||||
/* Order of operations:
|
||||
/* This routine will become more complex and might have to be
|
||||
* broken into more sections/function calls
|
||||
*
|
||||
* Order of operations:
|
||||
* alloc memory for nb reqs (in case we fall through)
|
||||
* add decision rules if using dynamic rules
|
||||
* compact rules using communicator size info etc
|
||||
@ -174,15 +181,13 @@ tuned_module_enable(mca_coll_base_module_t *module,
|
||||
*/
|
||||
|
||||
/* Allocate the data that hangs off the communicator */
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
size = ompi_comm_remote_size(comm);
|
||||
} else {
|
||||
size = ompi_comm_size(comm);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/**
|
||||
* we still malloc data as it is used by the TUNED modules
|
||||
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
|
||||
* we place any special info after the default data
|
||||
@ -195,12 +200,9 @@ tuned_module_enable(mca_coll_base_module_t *module,
|
||||
*/
|
||||
|
||||
/* if we within the memory/size limit, allow preallocated data */
|
||||
|
||||
|
||||
if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) {
|
||||
data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) +
|
||||
(sizeof(ompi_request_t *) * size * 2));
|
||||
|
||||
if (NULL == data) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -208,7 +210,6 @@ tuned_module_enable(mca_coll_base_module_t *module,
|
||||
data->mcct_num_reqs = size * 2;
|
||||
} else {
|
||||
data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t));
|
||||
|
||||
if (NULL == data) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -216,138 +217,62 @@ tuned_module_enable(mca_coll_base_module_t *module,
|
||||
data->mcct_num_reqs = 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/**
|
||||
* If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file..
|
||||
* then this effects how much storage space you need
|
||||
* (This is a basic version of what will go into V2)
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */
|
||||
/* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */
|
||||
/* which only has rules in it for our com size */
|
||||
|
||||
/* actually if they are below a threadhold, they all open it */
|
||||
/* have to build a collective in here.. but just for MCW.. */
|
||||
/* but we have to make sure we have the same rules everywhere :( */
|
||||
|
||||
/* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
/* base rules */
|
||||
data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
|
||||
|
||||
/* each collective rule for my com size */
|
||||
for (i=0;i<COLLCOUNT;i++) {
|
||||
data->com_rules[i] = (ompi_coll_com_rule_t*) NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* next dynamic state, recheck all forced rules as well */
|
||||
/* warning, we should check to make sure this is really an INTRA comm here... */
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHER], &(data->user_forced[ALLGATHER]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHERV], &(data->user_forced[ALLGATHERV]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV]));
|
||||
ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCESCATTER], &(data->user_forced[REDUCESCATTER]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[GATHER], &(data->user_forced[GATHER]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[SCATTER], &(data->user_forced[SCATTER]));
|
||||
}
|
||||
|
||||
|
||||
if (&ompi_mpi_comm_world.comm==comm) {
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
|
||||
if (ompi_coll_tuned_dynamic_rules_filename) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]",
|
||||
ompi_coll_tuned_dynamic_rules_filename));
|
||||
rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename,
|
||||
&(data->all_base_rules), COLLCOUNT);
|
||||
if (rc>=0) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc));
|
||||
/* at this point we all have a base set of rules */
|
||||
/* now we can get our customized communicator sized rule set, for each collective */
|
||||
for (i=0;i<COLLCOUNT;i++) {
|
||||
data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
|
||||
}
|
||||
}
|
||||
else { /* failed to read config file, thus make sure its a NULL... */
|
||||
data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
|
||||
}
|
||||
} /* end if a config filename exists */
|
||||
} /* end if dynamic_rules */
|
||||
} /* end if MCW */
|
||||
|
||||
/* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */
|
||||
/* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */
|
||||
|
||||
#if 0 /* FIXME: don't know how to deal with this */
|
||||
if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&&
|
||||
((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) {
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic"));
|
||||
|
||||
/* this will, erm fail if MCW doesn't exist which it should! */
|
||||
data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules;
|
||||
|
||||
/* at this point we all have a base set of rules if they exist atall */
|
||||
/* now we can get our customized communicator sized rule set, for each collective */
|
||||
for (i=0;i<COLLCOUNT;i++) {
|
||||
data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* now for the cached topo functions
|
||||
* guess the initial topologies to use rank 0 as root
|
||||
/**
|
||||
* next dynamic state, recheck all forced rules as well
|
||||
* warning, we should check to make sure this is really an INTRA comm here...
|
||||
*/
|
||||
ompi_coll_tuned_forced_getvalues( ALLGATHER, &(data->user_forced[ALLGATHER]));
|
||||
ompi_coll_tuned_forced_getvalues( ALLGATHERV, &(data->user_forced[ALLGATHERV]));
|
||||
ompi_coll_tuned_forced_getvalues( ALLREDUCE, &(data->user_forced[ALLREDUCE]));
|
||||
ompi_coll_tuned_forced_getvalues( ALLTOALL, &(data->user_forced[ALLTOALL]));
|
||||
ompi_coll_tuned_forced_getvalues( ALLTOALLV, &(data->user_forced[ALLTOALLV]));
|
||||
ompi_coll_tuned_forced_getvalues( ALLTOALLW, &(data->user_forced[ALLTOALLW]));
|
||||
ompi_coll_tuned_forced_getvalues( BARRIER, &(data->user_forced[BARRIER]));
|
||||
ompi_coll_tuned_forced_getvalues( BCAST, &(data->user_forced[BCAST]));
|
||||
ompi_coll_tuned_forced_getvalues( EXSCAN, &(data->user_forced[EXSCAN]));
|
||||
ompi_coll_tuned_forced_getvalues( GATHER, &(data->user_forced[GATHER]));
|
||||
ompi_coll_tuned_forced_getvalues( GATHERV, &(data->user_forced[GATHERV]));
|
||||
ompi_coll_tuned_forced_getvalues( REDUCE, &(data->user_forced[REDUCE]));
|
||||
ompi_coll_tuned_forced_getvalues( REDUCESCATTER, &(data->user_forced[REDUCESCATTER]));
|
||||
ompi_coll_tuned_forced_getvalues( SCAN, &(data->user_forced[SCAN]));
|
||||
ompi_coll_tuned_forced_getvalues( SCATTER, &(data->user_forced[SCATTER]));
|
||||
ompi_coll_tuned_forced_getvalues( SCATTERV, &(data->user_forced[SCATTERV]));
|
||||
|
||||
if( NULL != mca_coll_tuned_component.all_base_rules ) {
|
||||
/* extract our customized communicator sized rule set, for each collective */
|
||||
for( i = 0; i < COLLCOUNT; i++ ) {
|
||||
data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules,
|
||||
i, size );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* general n fan out tree */
|
||||
data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout,
|
||||
comm, 0);
|
||||
data->cached_ntree_root = 0;
|
||||
data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout;
|
||||
|
||||
data->cached_ntree = NULL;
|
||||
/* binary tree */
|
||||
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
|
||||
data->cached_bintree_root = 0;
|
||||
|
||||
data->cached_bintree = NULL;
|
||||
/* binomial tree */
|
||||
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
|
||||
data->cached_bmtree_root = 0;
|
||||
|
||||
data->cached_bmtree = NULL;
|
||||
/* binomial tree */
|
||||
data->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree (comm, 0);
|
||||
data->cached_in_order_bmtree_root = 0;
|
||||
/*
|
||||
* chains (fanout followed by pipelines)
|
||||
* are more difficuilt as the fan out really really depends on message size [sometimes]..
|
||||
* as size gets larger fan-out gets smaller [usually]
|
||||
*
|
||||
* will probably change how we cache this later, for now a midsize
|
||||
* GEF
|
||||
*/
|
||||
data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout,
|
||||
comm, 0);
|
||||
data->cached_chain_root = 0;
|
||||
data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout;
|
||||
|
||||
data->cached_in_order_bmtree = NULL;
|
||||
/* chains (fanout followed by pipelines) */
|
||||
data->cached_chain = NULL;
|
||||
/* standard pipeline */
|
||||
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
|
||||
data->cached_pipeline_root = 0;
|
||||
|
||||
data->cached_pipeline = NULL;
|
||||
/* in-order binary tree */
|
||||
data->cached_in_order_bintree = ompi_coll_tuned_topo_build_in_order_bintree(comm);
|
||||
data->cached_in_order_bintree = NULL;
|
||||
|
||||
/* All done */
|
||||
|
||||
tuned_module->tuned_data = data;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user