1
1
This commit was SVN r7168.
Этот коммит содержится в:
Graham Fagg 2005-09-03 01:41:13 +00:00
родитель 12daecb826
Коммит 36eddb6609
7 изменённых файлов: 164 добавлений и 85 удалений

Просмотреть файл

@ -444,13 +444,13 @@ typedef struct rule_s {
struct mca_coll_base_comm_t {
/* standard data for requests and PML usage */
/* we need to keep this here for now incase we fall through to the
* basic functions that expect these fields/and memory to be
* avaliable (GEF something for JS?)
/* Precreate space for requests
* Note this does not effect basic,
* but if in wrong context can confuse a debugger
*/
ompi_request_t **mccb_reqs;
int mccb_num_reqs;
ompi_request_t **mcct_reqs;
int mcct_num_reqs;
/*
* tuned topo information caching per communicator
@ -461,17 +461,28 @@ struct mca_coll_base_comm_t {
*
*/
ompi_coll_tree_t *cached_tree;
int cached_tree_root;
int cached_tree_fanout;
/* general tree with n fan out */
ompi_coll_tree_t *cached_ntree;
int cached_ntree_root;
int cached_ntree_fanout;
ompi_coll_bmtree_t *cached_bmtree;
/* binary tree */
ompi_coll_tree_t *cached_bintree;
int cached_bintree_root;
/* binomial tree */
ompi_coll_tree_t *cached_bmtree;
int cached_bmtree_root;
/* chained tree (fanout followed by pipelines) */
ompi_coll_chain_t *cached_chain;
int cached_chain_root;
int cached_chain_fanout;
/* pipeline */
ompi_coll_chain_t *cached_pipeline;
int cached_pipeline_root;
/* extra data required by the decision functions */
rule_t* decision_table;
};

Просмотреть файл

@ -171,6 +171,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
chain->chain_next[i],
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]);
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} /* end of for each child */
}
@ -256,17 +257,15 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
* other wise recreate it.
*/
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root)
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) {
tree = comm->c_coll_selected_data->cached_tree;
if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
tree = comm->c_coll_selected_data->cached_bintree;
}
else {
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree);
if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
}
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_tree_root = root;
comm->c_coll_selected_data->cached_tree_fanout = 2;
comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_bintree_root = root;
}
@ -513,17 +512,15 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
* other wise recreate it.
*/
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root)
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) {
tree = comm->c_coll_selected_data->cached_tree;
if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
tree = comm->c_coll_selected_data->cached_bintree;
}
else {
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree);
if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
}
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_tree_root = root;
comm->c_coll_selected_data->cached_tree_fanout = 2;
comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_bintree_root = root;
}

Просмотреть файл

@ -53,9 +53,11 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
rank = ompi_comm_rank(comm);
/* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (8192)); */
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (0)); */
/* err = mca_coll_tuned_bcast_intra_chain (buff, count, datatype, root, comm, (0), 1); */
/* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (1024));
/* err = mca_coll_tuned_bcast_intra_split_bintree (buff, count, datatype, root, comm, (100)); */
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (100));
return err;
}

Просмотреть файл

@ -108,7 +108,7 @@ static int tuned_open(void)
/* some initial guesses at topology parameters */
mca_coll_tuned_init_tree_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
NULL, 2);
NULL, 4);
mca_coll_tuned_init_chain_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_chain_fanout",

Просмотреть файл

@ -306,7 +306,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
int size;
struct mca_coll_base_comm_t *data;
/* fanout parameters */
int tree_fanout_default = 2;
int tree_fanout_default = 4;
int chain_fanout_default = 4;
@ -331,14 +331,22 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
} else {
size = ompi_comm_size(comm);
}
/*
* we still malloc data as it is used by BOTH the TUNED and the BASIC modules
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
* we place any special info after the default data
*
*/
data = malloc(sizeof(struct mca_coll_base_comm_t) +
(sizeof(ompi_request_t *) * size * 2));
if (NULL == data) {
return NULL;
}
data->mccb_reqs = (ompi_request_t **) (data + 1);
data->mccb_num_reqs = size * 2;
data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mcct_num_reqs = size * 2;
/*
* now for the cached topo functions
@ -350,23 +358,26 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
&tree_fanout_default)) {
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
tree_fanout_default = 2; /* make it binary if failed lookup. */
}
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
&chain_fanout_default)) {
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
chain_fanout_default = 4;
}
data->cached_tree = ompi_coll_tuned_topo_build_tree (tree_fanout_default,
comm, 0);
data->cached_tree_root = 0;
data->cached_tree_fanout = tree_fanout_default;
/* general n fan out tree */
data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
data->cached_ntree_root = 0;
data->cached_ntree_fanout = tree_fanout_default;
/* binary tree */
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
data->cached_bintree_root = 0;
/* binomial tree */
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
data->cached_tree_root = 0;
data->cached_bmtree_root = 0;
/*
* chains (fanout followed by pipelines)
@ -376,11 +387,14 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
* will probably change how we cache this later, for now a midsize
* GEF
*/
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default,
comm, 0);
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
data->cached_chain_root = 0;
data->cached_chain_fanout = chain_fanout_default;
/* standard pipeline */
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
data->cached_pipeline_root = 0;
/* All done */
comm->c_coll_selected_data = data;
@ -403,10 +417,28 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
the generel c_coll_selected_data */
comm->c_coll_selected_data->mccb_reqs = NULL;
comm->c_coll_selected_data->mccb_num_reqs = 0;
comm->c_coll_selected_data->mcct_reqs = NULL;
comm->c_coll_selected_data->mcct_num_reqs = 0;
#endif
/* free any cached information that has been allocated */
if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree);
}
if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
}
if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree);
}
if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
}
if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline);
}
/* All done */
free(comm->c_coll_selected_data);

Просмотреть файл

@ -73,6 +73,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
if (fanout<1) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout);
return NULL;
}
if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
return NULL;
}
/*
* Get size and rank of the process in this communicator
*/
@ -80,17 +89,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
rank = ompi_comm_rank(comm);
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
if (!tree) {
printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n");
fflush(stdout);
return NULL;
}
tree->tree_root = MPI_UNDEFINED;
tree->tree_nextsize = MPI_UNDEFINED;
/*
* Check if we calculated the tree for this root and
* fanout combination already (on this communicator)
*/
if( (root == tree->tree_root) && (fanout == tree->tree_fanout) ) {
return tree;
}
/*
* Set root
*/
@ -100,6 +107,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
* Initialize tree
*/
tree->tree_fanout = fanout;
tree->tree_bmtree = 0;
tree->tree_root = root;
tree->tree_prev = -1;
tree->tree_nextsize = 0;
@ -155,10 +163,23 @@ ompi_coll_tuned_topo_build_tree( int fanout,
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
{
ompi_coll_tree_t *ptr;
return 0;
if ((!tree)||(!*tree)) {
return OMPI_SUCCESS;
}
ompi_coll_bmtree_t*
ptr = *tree;
free (ptr);
*tree = NULL; /* mark tree as gone */
return OMPI_SUCCESS;
}
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root )
{
@ -168,7 +189,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int mask = 1;
int index;
int remote;
ompi_coll_bmtree_t *bmtree;
ompi_coll_tree_t *bmtree;
int i;
printf("Building tuned topo bmtree: rt %d\n", root);
@ -180,13 +202,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
index = rank -root;
bmtree = (ompi_coll_bmtree_t*)malloc(sizeof(ompi_coll_bmtree_t));
bmtree->bmtree_root = MPI_UNDEFINED;
bmtree->bmtree_nextsize = MPI_UNDEFINED;
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
if (!bmtree) {
printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n");
fflush(stdout);
return NULL;
}
if( bmtree->bmtree_root == root ) {
/* the bmtree was computed before */
return bmtree;
bmtree->tree_bmtree = 1;
bmtree->tree_root = MPI_UNDEFINED;
bmtree->tree_nextsize = MPI_UNDEFINED;
for(i=0;i<MAXTREEFANOUT;i++) {
bmtree->tree_next[i] = -1;
}
if( index < 0 ) index += size;
@ -195,11 +223,11 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
/* Now I can compute my father rank */
if( root == rank ) {
bmtree->bmtree_prev = root;
bmtree->tree_prev = root;
} else {
remote = (index ^ (mask >> 1)) + root;
if( remote >= size ) remote -= size;
bmtree->bmtree_prev = remote;
bmtree->tree_prev = remote;
}
/* And now let's fill my childs */
while( mask < size ) {
@ -207,19 +235,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
if( remote >= size ) break;
remote += root;
if( remote >= size ) remote -= size;
bmtree->bmtree_next[childs] = remote;
if (childs==MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs);
return NULL;
}
bmtree->tree_next[childs] = remote;
mask <<= 1;
childs++;
}
bmtree->bmtree_nextsize = childs;
bmtree->bmtree_root = root;
bmtree->tree_nextsize = childs;
bmtree->tree_root = root;
return bmtree;
}
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree )
{
return OMPI_SUCCESS;
}
ompi_coll_chain_t*
ompi_coll_tuned_topo_build_chain( int fanout,
@ -240,7 +268,11 @@ ompi_coll_tuned_topo_build_chain( int fanout,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
if( fanout < 1 ) {
return NULL;
}
if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
return NULL;
}
@ -248,17 +280,16 @@ ompi_coll_tuned_topo_build_chain( int fanout,
* Allocate space for topology arrays if needed
*/
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
if (!chain) {
printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n");
fflush(stdout);
return NULL;
}
chain->chain_root = MPI_UNDEFINED;
chain->chain_nextsize = -1;
chain->chain_numchain = -1;
for(i=0;i<fanout;i++) chain->chain_next[i] = -1;
/*
* Check if we calculated the topology for this root and comm
*/
if( (root == chain->chain_root) &&
(fanout == chain->chain_numchain) ) {
return chain;
}
/*
* Set root & numchain
*/
@ -374,5 +405,18 @@ ompi_coll_tuned_topo_build_chain( int fanout,
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
{
ompi_coll_chain_t *ptr;
return 0;
if ((!chain)||(!*chain)) {
return OMPI_SUCCESS;
}
ptr = *chain;
free (ptr);
*chain = NULL; /* mark chain as gone */
return OMPI_SUCCESS;
}

Просмотреть файл

@ -29,18 +29,12 @@ extern "C"
typedef struct ompi_coll_tree_t {
int32_t tree_root;
int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize;
} ompi_coll_tree_t;
typedef struct ompi_coll_bmtree_t {
int32_t bmtree_root;
int32_t bmtree_prev;
int32_t bmtree_next[MAXTREEFANOUT];
int32_t bmtree_nextsize;
} ompi_coll_bmtree_t;
typedef struct ompi_coll_chain_t {
int32_t chain_root;
int32_t chain_prev;
@ -55,10 +49,9 @@ ompi_coll_tuned_topo_build_tree( int fanout,
int root );
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
ompi_coll_bmtree_t*
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root );
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree );
ompi_coll_chain_t*
ompi_coll_tuned_topo_build_chain( int fanout,