1
1
This commit was SVN r7168.
Этот коммит содержится в:
Graham Fagg 2005-09-03 01:41:13 +00:00
родитель 12daecb826
Коммит 36eddb6609
7 изменённых файлов: 164 добавлений и 85 удалений

Просмотреть файл

@ -444,13 +444,13 @@ typedef struct rule_s {
struct mca_coll_base_comm_t { struct mca_coll_base_comm_t {
/* standard data for requests and PML usage */ /* standard data for requests and PML usage */
/* we need to keep this here for now incase we fall through to the /* Precreate space for requests
* basic functions that expect these fields/and memory to be * Note this does not effect basic,
* avaliable (GEF something for JS?) * but if in wrong context can confuse a debugger
*/ */
ompi_request_t **mccb_reqs;
int mccb_num_reqs;
ompi_request_t **mcct_reqs;
int mcct_num_reqs;
/* /*
* tuned topo information caching per communicator * tuned topo information caching per communicator
@ -461,17 +461,28 @@ struct mca_coll_base_comm_t {
* *
*/ */
ompi_coll_tree_t *cached_tree; /* general tree with n fan out */
int cached_tree_root; ompi_coll_tree_t *cached_ntree;
int cached_tree_fanout; int cached_ntree_root;
int cached_ntree_fanout;
ompi_coll_bmtree_t *cached_bmtree; /* binary tree */
ompi_coll_tree_t *cached_bintree;
int cached_bintree_root;
/* binomial tree */
ompi_coll_tree_t *cached_bmtree;
int cached_bmtree_root; int cached_bmtree_root;
/* chained tree (fanout followed by pipelines) */
ompi_coll_chain_t *cached_chain; ompi_coll_chain_t *cached_chain;
int cached_chain_root; int cached_chain_root;
int cached_chain_fanout; int cached_chain_fanout;
/* pipeline */
ompi_coll_chain_t *cached_pipeline;
int cached_pipeline_root;
/* extra data required by the decision functions */ /* extra data required by the decision functions */
rule_t* decision_table; rule_t* decision_table;
}; };

Просмотреть файл

@ -171,6 +171,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
chain->chain_next[i], chain->chain_next[i],
MCA_COLL_BASE_TAG_BCAST, MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm)); MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]);
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} /* end of for each child */ } /* end of for each child */
} }
@ -256,17 +257,15 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
* other wise recreate it. * other wise recreate it.
*/ */
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root) if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) { tree = comm->c_coll_selected_data->cached_bintree;
tree = comm->c_coll_selected_data->cached_tree;
} }
else { else {
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */ if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree); ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
} }
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_tree_root = root; comm->c_coll_selected_data->cached_bintree_root = root;
comm->c_coll_selected_data->cached_tree_fanout = 2;
} }
@ -513,17 +512,15 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
* other wise recreate it. * other wise recreate it.
*/ */
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root) if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) { tree = comm->c_coll_selected_data->cached_bintree;
tree = comm->c_coll_selected_data->cached_tree;
} }
else { else {
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */ if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree); ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
} }
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
comm->c_coll_selected_data->cached_tree_root = root; comm->c_coll_selected_data->cached_bintree_root = root;
comm->c_coll_selected_data->cached_tree_fanout = 2;
} }

Просмотреть файл

@ -53,9 +53,11 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
/* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */ /* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (8192)); */ /* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (0)); */
/* err = mca_coll_tuned_bcast_intra_chain (buff, count, datatype, root, comm, (0), 1); */
/* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */ /* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (1024)); /* err = mca_coll_tuned_bcast_intra_split_bintree (buff, count, datatype, root, comm, (100)); */
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (100));
return err; return err;
} }

Просмотреть файл

@ -108,7 +108,7 @@ static int tuned_open(void)
/* some initial guesses at topology parameters */ /* some initial guesses at topology parameters */
mca_coll_tuned_init_tree_fanout_param = mca_coll_tuned_init_tree_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_tree_fanout", mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
NULL, 2); NULL, 4);
mca_coll_tuned_init_chain_fanout_param = mca_coll_tuned_init_chain_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_chain_fanout", mca_base_param_register_int("coll", "tuned", "init_chain_fanout",

Просмотреть файл

@ -306,7 +306,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
int size; int size;
struct mca_coll_base_comm_t *data; struct mca_coll_base_comm_t *data;
/* fanout parameters */ /* fanout parameters */
int tree_fanout_default = 2; int tree_fanout_default = 4;
int chain_fanout_default = 4; int chain_fanout_default = 4;
@ -331,14 +331,22 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
} else { } else {
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
} }
/*
* we still malloc data as it is used by BOTH the TUNED and the BASIC modules
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
* we place any special info after the default data
*
*/
data = malloc(sizeof(struct mca_coll_base_comm_t) + data = malloc(sizeof(struct mca_coll_base_comm_t) +
(sizeof(ompi_request_t *) * size * 2)); (sizeof(ompi_request_t *) * size * 2));
if (NULL == data) { if (NULL == data) {
return NULL; return NULL;
} }
data->mccb_reqs = (ompi_request_t **) (data + 1); data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mccb_num_reqs = size * 2; data->mcct_num_reqs = size * 2;
/* /*
* now for the cached topo functions * now for the cached topo functions
@ -350,23 +358,26 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param, mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
&tree_fanout_default)) { &tree_fanout_default)) {
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n"); printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
tree_fanout_default = 2; /* make it binary if failed lookup. */
} }
if (OMPI_SUCCESS != if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param, mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
&chain_fanout_default)) { &chain_fanout_default)) {
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n"); printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
chain_fanout_default = 4;
} }
data->cached_tree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, /* general n fan out tree */
comm, 0); data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
data->cached_tree_root = 0; data->cached_ntree_root = 0;
data->cached_tree_fanout = tree_fanout_default; data->cached_ntree_fanout = tree_fanout_default;
/* binary tree */
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
data->cached_bintree_root = 0;
/* binomial tree */
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
data->cached_tree_root = 0; data->cached_bmtree_root = 0;
/* /*
* chains (fanout followed by pipelines) * chains (fanout followed by pipelines)
@ -376,11 +387,14 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
* will probably change how we cache this later, for now a midsize * will probably change how we cache this later, for now a midsize
* GEF * GEF
*/ */
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
comm, 0);
data->cached_chain_root = 0; data->cached_chain_root = 0;
data->cached_chain_fanout = chain_fanout_default; data->cached_chain_fanout = chain_fanout_default;
/* standard pipeline */
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
data->cached_pipeline_root = 0;
/* All done */ /* All done */
comm->c_coll_selected_data = data; comm->c_coll_selected_data = data;
@ -403,10 +417,28 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
the generel c_coll_selected_data */ the generel c_coll_selected_data */
comm->c_coll_selected_data->mccb_reqs = NULL; comm->c_coll_selected_data->mcct_reqs = NULL;
comm->c_coll_selected_data->mccb_num_reqs = 0; comm->c_coll_selected_data->mcct_num_reqs = 0;
#endif #endif
/* free any cached information that has been allocated */
if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree);
}
if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
}
if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree);
}
if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
}
if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline);
}
/* All done */ /* All done */
free(comm->c_coll_selected_data); free(comm->c_coll_selected_data);

Просмотреть файл

@ -73,6 +73,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root); printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
if (fanout<1) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout);
return NULL;
}
if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
return NULL;
}
/* /*
* Get size and rank of the process in this communicator * Get size and rank of the process in this communicator
*/ */
@ -80,17 +89,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t)); tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
if (!tree) {
printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n");
fflush(stdout);
return NULL;
}
tree->tree_root = MPI_UNDEFINED; tree->tree_root = MPI_UNDEFINED;
tree->tree_nextsize = MPI_UNDEFINED; tree->tree_nextsize = MPI_UNDEFINED;
/*
* Check if we calculated the tree for this root and
* fanout combination already (on this communicator)
*/
if( (root == tree->tree_root) && (fanout == tree->tree_fanout) ) {
return tree;
}
/* /*
* Set root * Set root
*/ */
@ -100,6 +107,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
* Initialize tree * Initialize tree
*/ */
tree->tree_fanout = fanout; tree->tree_fanout = fanout;
tree->tree_bmtree = 0;
tree->tree_root = root; tree->tree_root = root;
tree->tree_prev = -1; tree->tree_prev = -1;
tree->tree_nextsize = 0; tree->tree_nextsize = 0;
@ -155,10 +163,23 @@ ompi_coll_tuned_topo_build_tree( int fanout,
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree ) int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
{ {
ompi_coll_tree_t *ptr;
return 0;
if ((!tree)||(!*tree)) {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
ompi_coll_bmtree_t* ptr = *tree;
free (ptr);
*tree = NULL; /* mark tree as gone */
return OMPI_SUCCESS;
}
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root ) int root )
{ {
@ -168,7 +189,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int mask = 1; int mask = 1;
int index; int index;
int remote; int remote;
ompi_coll_bmtree_t *bmtree; ompi_coll_tree_t *bmtree;
int i;
printf("Building tuned topo bmtree: rt %d\n", root); printf("Building tuned topo bmtree: rt %d\n", root);
@ -180,13 +202,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
index = rank -root; index = rank -root;
bmtree = (ompi_coll_bmtree_t*)malloc(sizeof(ompi_coll_bmtree_t)); bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
bmtree->bmtree_root = MPI_UNDEFINED; if (!bmtree) {
bmtree->bmtree_nextsize = MPI_UNDEFINED; printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n");
fflush(stdout);
return NULL;
}
if( bmtree->bmtree_root == root ) { bmtree->tree_bmtree = 1;
/* the bmtree was computed before */
return bmtree; bmtree->tree_root = MPI_UNDEFINED;
bmtree->tree_nextsize = MPI_UNDEFINED;
for(i=0;i<MAXTREEFANOUT;i++) {
bmtree->tree_next[i] = -1;
} }
if( index < 0 ) index += size; if( index < 0 ) index += size;
@ -195,11 +223,11 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
/* Now I can compute my father rank */ /* Now I can compute my father rank */
if( root == rank ) { if( root == rank ) {
bmtree->bmtree_prev = root; bmtree->tree_prev = root;
} else { } else {
remote = (index ^ (mask >> 1)) + root; remote = (index ^ (mask >> 1)) + root;
if( remote >= size ) remote -= size; if( remote >= size ) remote -= size;
bmtree->bmtree_prev = remote; bmtree->tree_prev = remote;
} }
/* And now let's fill my childs */ /* And now let's fill my childs */
while( mask < size ) { while( mask < size ) {
@ -207,19 +235,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
if( remote >= size ) break; if( remote >= size ) break;
remote += root; remote += root;
if( remote >= size ) remote -= size; if( remote >= size ) remote -= size;
bmtree->bmtree_next[childs] = remote; if (childs==MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs);
return NULL;
}
bmtree->tree_next[childs] = remote;
mask <<= 1; mask <<= 1;
childs++; childs++;
} }
bmtree->bmtree_nextsize = childs; bmtree->tree_nextsize = childs;
bmtree->bmtree_root = root; bmtree->tree_root = root;
return bmtree; return bmtree;
} }
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree )
{
return OMPI_SUCCESS;
}
ompi_coll_chain_t* ompi_coll_chain_t*
ompi_coll_tuned_topo_build_chain( int fanout, ompi_coll_tuned_topo_build_chain( int fanout,
@ -240,7 +268,11 @@ ompi_coll_tuned_topo_build_chain( int fanout,
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
if( fanout < 1 ) {
return NULL;
}
if (fanout>MAXTREEFANOUT) { if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
return NULL; return NULL;
} }
@ -248,17 +280,16 @@ ompi_coll_tuned_topo_build_chain( int fanout,
* Allocate space for topology arrays if needed * Allocate space for topology arrays if needed
*/ */
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) ); chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
if (!chain) {
printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n");
fflush(stdout);
return NULL;
}
chain->chain_root = MPI_UNDEFINED; chain->chain_root = MPI_UNDEFINED;
chain->chain_nextsize = -1; chain->chain_nextsize = -1;
chain->chain_numchain = -1; chain->chain_numchain = -1;
for(i=0;i<fanout;i++) chain->chain_next[i] = -1;
/*
* Check if we calculated the topology for this root and comm
*/
if( (root == chain->chain_root) &&
(fanout == chain->chain_numchain) ) {
return chain;
}
/* /*
* Set root & numchain * Set root & numchain
*/ */
@ -374,5 +405,18 @@ ompi_coll_tuned_topo_build_chain( int fanout,
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain ) int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
{ {
ompi_coll_chain_t *ptr;
return 0;
if ((!chain)||(!*chain)) {
return OMPI_SUCCESS;
}
ptr = *chain;
free (ptr);
*chain = NULL; /* mark chain as gone */
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -29,18 +29,12 @@ extern "C"
typedef struct ompi_coll_tree_t { typedef struct ompi_coll_tree_t {
int32_t tree_root; int32_t tree_root;
int32_t tree_fanout; int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev; int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT]; int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize; int32_t tree_nextsize;
} ompi_coll_tree_t; } ompi_coll_tree_t;
typedef struct ompi_coll_bmtree_t {
int32_t bmtree_root;
int32_t bmtree_prev;
int32_t bmtree_next[MAXTREEFANOUT];
int32_t bmtree_nextsize;
} ompi_coll_bmtree_t;
typedef struct ompi_coll_chain_t { typedef struct ompi_coll_chain_t {
int32_t chain_root; int32_t chain_root;
int32_t chain_prev; int32_t chain_prev;
@ -55,10 +49,9 @@ ompi_coll_tuned_topo_build_tree( int fanout,
int root ); int root );
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree ); int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
ompi_coll_bmtree_t* ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root ); int root );
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree );
ompi_coll_chain_t* ompi_coll_chain_t*
ompi_coll_tuned_topo_build_chain( int fanout, ompi_coll_tuned_topo_build_chain( int fanout,