checkpoint
This commit was SVN r7168.
Этот коммит содержится в:
родитель
12daecb826
Коммит
36eddb6609
@ -444,13 +444,13 @@ typedef struct rule_s {
|
|||||||
struct mca_coll_base_comm_t {
|
struct mca_coll_base_comm_t {
|
||||||
/* standard data for requests and PML usage */
|
/* standard data for requests and PML usage */
|
||||||
|
|
||||||
/* we need to keep this here for now incase we fall through to the
|
/* Precreate space for requests
|
||||||
* basic functions that expect these fields/and memory to be
|
* Note this does not effect basic,
|
||||||
* avaliable (GEF something for JS?)
|
* but if in wrong context can confuse a debugger
|
||||||
*/
|
*/
|
||||||
ompi_request_t **mccb_reqs;
|
|
||||||
int mccb_num_reqs;
|
|
||||||
|
|
||||||
|
ompi_request_t **mcct_reqs;
|
||||||
|
int mcct_num_reqs;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tuned topo information caching per communicator
|
* tuned topo information caching per communicator
|
||||||
@ -461,17 +461,28 @@ struct mca_coll_base_comm_t {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ompi_coll_tree_t *cached_tree;
|
/* general tree with n fan out */
|
||||||
int cached_tree_root;
|
ompi_coll_tree_t *cached_ntree;
|
||||||
int cached_tree_fanout;
|
int cached_ntree_root;
|
||||||
|
int cached_ntree_fanout;
|
||||||
|
|
||||||
ompi_coll_bmtree_t *cached_bmtree;
|
/* binary tree */
|
||||||
|
ompi_coll_tree_t *cached_bintree;
|
||||||
|
int cached_bintree_root;
|
||||||
|
|
||||||
|
/* binomial tree */
|
||||||
|
ompi_coll_tree_t *cached_bmtree;
|
||||||
int cached_bmtree_root;
|
int cached_bmtree_root;
|
||||||
|
|
||||||
|
/* chained tree (fanout followed by pipelines) */
|
||||||
ompi_coll_chain_t *cached_chain;
|
ompi_coll_chain_t *cached_chain;
|
||||||
int cached_chain_root;
|
int cached_chain_root;
|
||||||
int cached_chain_fanout;
|
int cached_chain_fanout;
|
||||||
|
|
||||||
|
/* pipeline */
|
||||||
|
ompi_coll_chain_t *cached_pipeline;
|
||||||
|
int cached_pipeline_root;
|
||||||
|
|
||||||
/* extra data required by the decision functions */
|
/* extra data required by the decision functions */
|
||||||
rule_t* decision_table;
|
rule_t* decision_table;
|
||||||
};
|
};
|
||||||
|
@ -171,6 +171,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
|||||||
chain->chain_next[i],
|
chain->chain_next[i],
|
||||||
MCA_COLL_BASE_TAG_BCAST,
|
MCA_COLL_BASE_TAG_BCAST,
|
||||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||||
|
if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]);
|
||||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||||
} /* end of for each child */
|
} /* end of for each child */
|
||||||
}
|
}
|
||||||
@ -256,17 +257,15 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
|||||||
* other wise recreate it.
|
* other wise recreate it.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root)
|
if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
|
||||||
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) {
|
tree = comm->c_coll_selected_data->cached_bintree;
|
||||||
tree = comm->c_coll_selected_data->cached_tree;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */
|
if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous tree if defined */
|
||||||
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree);
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
|
||||||
}
|
}
|
||||||
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
|
comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
|
||||||
comm->c_coll_selected_data->cached_tree_root = root;
|
comm->c_coll_selected_data->cached_bintree_root = root;
|
||||||
comm->c_coll_selected_data->cached_tree_fanout = 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -513,17 +512,15 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
|
|||||||
* other wise recreate it.
|
* other wise recreate it.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root)
|
if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) {
|
||||||
&& (comm->c_coll_selected_data->cached_tree_fanout == 2)) {
|
tree = comm->c_coll_selected_data->cached_bintree;
|
||||||
tree = comm->c_coll_selected_data->cached_tree;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */
|
if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous bintree if defined */
|
||||||
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree);
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
|
||||||
}
|
}
|
||||||
comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
|
comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root );
|
||||||
comm->c_coll_selected_data->cached_tree_root = root;
|
comm->c_coll_selected_data->cached_bintree_root = root;
|
||||||
comm->c_coll_selected_data->cached_tree_fanout = 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,9 +53,11 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
|
|||||||
rank = ompi_comm_rank(comm);
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
/* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */
|
/* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */
|
||||||
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (8192)); */
|
/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (0)); */
|
||||||
|
/* err = mca_coll_tuned_bcast_intra_chain (buff, count, datatype, root, comm, (0), 1); */
|
||||||
/* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */
|
/* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */
|
||||||
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (1024));
|
/* err = mca_coll_tuned_bcast_intra_split_bintree (buff, count, datatype, root, comm, (100)); */
|
||||||
|
err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (100));
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ static int tuned_open(void)
|
|||||||
/* some initial guesses at topology parameters */
|
/* some initial guesses at topology parameters */
|
||||||
mca_coll_tuned_init_tree_fanout_param =
|
mca_coll_tuned_init_tree_fanout_param =
|
||||||
mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
|
mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
|
||||||
NULL, 2);
|
NULL, 4);
|
||||||
|
|
||||||
mca_coll_tuned_init_chain_fanout_param =
|
mca_coll_tuned_init_chain_fanout_param =
|
||||||
mca_base_param_register_int("coll", "tuned", "init_chain_fanout",
|
mca_base_param_register_int("coll", "tuned", "init_chain_fanout",
|
||||||
|
@ -306,7 +306,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
|||||||
int size;
|
int size;
|
||||||
struct mca_coll_base_comm_t *data;
|
struct mca_coll_base_comm_t *data;
|
||||||
/* fanout parameters */
|
/* fanout parameters */
|
||||||
int tree_fanout_default = 2;
|
int tree_fanout_default = 4;
|
||||||
int chain_fanout_default = 4;
|
int chain_fanout_default = 4;
|
||||||
|
|
||||||
|
|
||||||
@ -331,14 +331,22 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
|||||||
} else {
|
} else {
|
||||||
size = ompi_comm_size(comm);
|
size = ompi_comm_size(comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we still malloc data as it is used by BOTH the TUNED and the BASIC modules
|
||||||
|
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
|
||||||
|
* we place any special info after the default data
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||||
(sizeof(ompi_request_t *) * size * 2));
|
(sizeof(ompi_request_t *) * size * 2));
|
||||||
|
|
||||||
if (NULL == data) {
|
if (NULL == data) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
data->mccb_reqs = (ompi_request_t **) (data + 1);
|
data->mcct_reqs = (ompi_request_t **) (data + 1);
|
||||||
data->mccb_num_reqs = size * 2;
|
data->mcct_num_reqs = size * 2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* now for the cached topo functions
|
* now for the cached topo functions
|
||||||
@ -350,23 +358,26 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
|||||||
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
|
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
|
||||||
&tree_fanout_default)) {
|
&tree_fanout_default)) {
|
||||||
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
|
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
|
||||||
tree_fanout_default = 2; /* make it binary if failed lookup. */
|
|
||||||
}
|
}
|
||||||
if (OMPI_SUCCESS !=
|
if (OMPI_SUCCESS !=
|
||||||
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
|
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
|
||||||
&chain_fanout_default)) {
|
&chain_fanout_default)) {
|
||||||
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
|
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
|
||||||
chain_fanout_default = 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
data->cached_tree = ompi_coll_tuned_topo_build_tree (tree_fanout_default,
|
/* general n fan out tree */
|
||||||
comm, 0);
|
data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
|
||||||
data->cached_tree_root = 0;
|
data->cached_ntree_root = 0;
|
||||||
data->cached_tree_fanout = tree_fanout_default;
|
data->cached_ntree_fanout = tree_fanout_default;
|
||||||
|
|
||||||
|
/* binary tree */
|
||||||
|
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
|
||||||
|
data->cached_bintree_root = 0;
|
||||||
|
|
||||||
|
/* binomial tree */
|
||||||
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
|
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
|
||||||
data->cached_tree_root = 0;
|
data->cached_bmtree_root = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* chains (fanout followed by pipelines)
|
* chains (fanout followed by pipelines)
|
||||||
@ -376,11 +387,14 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
|||||||
* will probably change how we cache this later, for now a midsize
|
* will probably change how we cache this later, for now a midsize
|
||||||
* GEF
|
* GEF
|
||||||
*/
|
*/
|
||||||
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default,
|
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
|
||||||
comm, 0);
|
|
||||||
data->cached_chain_root = 0;
|
data->cached_chain_root = 0;
|
||||||
data->cached_chain_fanout = chain_fanout_default;
|
data->cached_chain_fanout = chain_fanout_default;
|
||||||
|
|
||||||
|
/* standard pipeline */
|
||||||
|
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
|
||||||
|
data->cached_pipeline_root = 0;
|
||||||
|
|
||||||
/* All done */
|
/* All done */
|
||||||
|
|
||||||
comm->c_coll_selected_data = data;
|
comm->c_coll_selected_data = data;
|
||||||
@ -403,10 +417,28 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
|
|||||||
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
|
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
|
||||||
the generel c_coll_selected_data */
|
the generel c_coll_selected_data */
|
||||||
|
|
||||||
comm->c_coll_selected_data->mccb_reqs = NULL;
|
comm->c_coll_selected_data->mcct_reqs = NULL;
|
||||||
comm->c_coll_selected_data->mccb_num_reqs = 0;
|
comm->c_coll_selected_data->mcct_num_reqs = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* free any cached information that has been allocated */
|
||||||
|
if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree);
|
||||||
|
}
|
||||||
|
if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
|
||||||
|
}
|
||||||
|
if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree);
|
||||||
|
}
|
||||||
|
if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
|
||||||
|
}
|
||||||
|
if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */
|
||||||
|
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* All done */
|
/* All done */
|
||||||
|
|
||||||
free(comm->c_coll_selected_data);
|
free(comm->c_coll_selected_data);
|
||||||
|
@ -73,6 +73,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
|||||||
|
|
||||||
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
|
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
|
||||||
|
|
||||||
|
if (fanout<1) {
|
||||||
|
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (fanout>MAXTREEFANOUT) {
|
||||||
|
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get size and rank of the process in this communicator
|
* Get size and rank of the process in this communicator
|
||||||
*/
|
*/
|
||||||
@ -80,17 +89,15 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
|||||||
rank = ompi_comm_rank(comm);
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
||||||
|
if (!tree) {
|
||||||
|
printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n");
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
tree->tree_root = MPI_UNDEFINED;
|
tree->tree_root = MPI_UNDEFINED;
|
||||||
tree->tree_nextsize = MPI_UNDEFINED;
|
tree->tree_nextsize = MPI_UNDEFINED;
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if we calculated the tree for this root and
|
|
||||||
* fanout combination already (on this communicator)
|
|
||||||
*/
|
|
||||||
if( (root == tree->tree_root) && (fanout == tree->tree_fanout) ) {
|
|
||||||
return tree;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set root
|
* Set root
|
||||||
*/
|
*/
|
||||||
@ -100,6 +107,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
|||||||
* Initialize tree
|
* Initialize tree
|
||||||
*/
|
*/
|
||||||
tree->tree_fanout = fanout;
|
tree->tree_fanout = fanout;
|
||||||
|
tree->tree_bmtree = 0;
|
||||||
tree->tree_root = root;
|
tree->tree_root = root;
|
||||||
tree->tree_prev = -1;
|
tree->tree_prev = -1;
|
||||||
tree->tree_nextsize = 0;
|
tree->tree_nextsize = 0;
|
||||||
@ -155,10 +163,23 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
|||||||
|
|
||||||
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
|
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
|
||||||
{
|
{
|
||||||
|
ompi_coll_tree_t *ptr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((!tree)||(!*tree)) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = *tree;
|
||||||
|
|
||||||
|
free (ptr);
|
||||||
|
*tree = NULL; /* mark tree as gone */
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
ompi_coll_bmtree_t*
|
ompi_coll_tree_t*
|
||||||
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||||
int root )
|
int root )
|
||||||
{
|
{
|
||||||
@ -168,7 +189,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
|||||||
int mask = 1;
|
int mask = 1;
|
||||||
int index;
|
int index;
|
||||||
int remote;
|
int remote;
|
||||||
ompi_coll_bmtree_t *bmtree;
|
ompi_coll_tree_t *bmtree;
|
||||||
|
int i;
|
||||||
|
|
||||||
printf("Building tuned topo bmtree: rt %d\n", root);
|
printf("Building tuned topo bmtree: rt %d\n", root);
|
||||||
|
|
||||||
@ -180,13 +202,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
|||||||
|
|
||||||
index = rank -root;
|
index = rank -root;
|
||||||
|
|
||||||
bmtree = (ompi_coll_bmtree_t*)malloc(sizeof(ompi_coll_bmtree_t));
|
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
||||||
bmtree->bmtree_root = MPI_UNDEFINED;
|
if (!bmtree) {
|
||||||
bmtree->bmtree_nextsize = MPI_UNDEFINED;
|
printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n");
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if( bmtree->bmtree_root == root ) {
|
bmtree->tree_bmtree = 1;
|
||||||
/* the bmtree was computed before */
|
|
||||||
return bmtree;
|
bmtree->tree_root = MPI_UNDEFINED;
|
||||||
|
bmtree->tree_nextsize = MPI_UNDEFINED;
|
||||||
|
for(i=0;i<MAXTREEFANOUT;i++) {
|
||||||
|
bmtree->tree_next[i] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( index < 0 ) index += size;
|
if( index < 0 ) index += size;
|
||||||
@ -195,11 +223,11 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
|||||||
|
|
||||||
/* Now I can compute my father rank */
|
/* Now I can compute my father rank */
|
||||||
if( root == rank ) {
|
if( root == rank ) {
|
||||||
bmtree->bmtree_prev = root;
|
bmtree->tree_prev = root;
|
||||||
} else {
|
} else {
|
||||||
remote = (index ^ (mask >> 1)) + root;
|
remote = (index ^ (mask >> 1)) + root;
|
||||||
if( remote >= size ) remote -= size;
|
if( remote >= size ) remote -= size;
|
||||||
bmtree->bmtree_prev = remote;
|
bmtree->tree_prev = remote;
|
||||||
}
|
}
|
||||||
/* And now let's fill my childs */
|
/* And now let's fill my childs */
|
||||||
while( mask < size ) {
|
while( mask < size ) {
|
||||||
@ -207,19 +235,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
|||||||
if( remote >= size ) break;
|
if( remote >= size ) break;
|
||||||
remote += root;
|
remote += root;
|
||||||
if( remote >= size ) remote -= size;
|
if( remote >= size ) remote -= size;
|
||||||
bmtree->bmtree_next[childs] = remote;
|
if (childs==MAXTREEFANOUT) {
|
||||||
|
printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
bmtree->tree_next[childs] = remote;
|
||||||
mask <<= 1;
|
mask <<= 1;
|
||||||
childs++;
|
childs++;
|
||||||
}
|
}
|
||||||
bmtree->bmtree_nextsize = childs;
|
bmtree->tree_nextsize = childs;
|
||||||
bmtree->bmtree_root = root;
|
bmtree->tree_root = root;
|
||||||
return bmtree;
|
return bmtree;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree )
|
|
||||||
{
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
ompi_coll_chain_t*
|
ompi_coll_chain_t*
|
||||||
ompi_coll_tuned_topo_build_chain( int fanout,
|
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||||
@ -240,7 +268,11 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
|||||||
size = ompi_comm_size(comm);
|
size = ompi_comm_size(comm);
|
||||||
rank = ompi_comm_rank(comm);
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
if( fanout > MAXTREEFANOUT ) {
|
if( fanout < 1 ) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (fanout>MAXTREEFANOUT) {
|
||||||
|
printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,17 +280,16 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
|||||||
* Allocate space for topology arrays if needed
|
* Allocate space for topology arrays if needed
|
||||||
*/
|
*/
|
||||||
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
|
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
|
||||||
|
if (!chain) {
|
||||||
|
printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n");
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
chain->chain_root = MPI_UNDEFINED;
|
chain->chain_root = MPI_UNDEFINED;
|
||||||
chain->chain_nextsize = -1;
|
chain->chain_nextsize = -1;
|
||||||
chain->chain_numchain = -1;
|
chain->chain_numchain = -1;
|
||||||
|
for(i=0;i<fanout;i++) chain->chain_next[i] = -1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if we calculated the topology for this root and comm
|
|
||||||
*/
|
|
||||||
if( (root == chain->chain_root) &&
|
|
||||||
(fanout == chain->chain_numchain) ) {
|
|
||||||
return chain;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Set root & numchain
|
* Set root & numchain
|
||||||
*/
|
*/
|
||||||
@ -374,5 +405,18 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
|||||||
|
|
||||||
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
|
int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
|
||||||
{
|
{
|
||||||
|
ompi_coll_chain_t *ptr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((!chain)||(!*chain)) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = *chain;
|
||||||
|
|
||||||
|
free (ptr);
|
||||||
|
*chain = NULL; /* mark chain as gone */
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -29,18 +29,12 @@ extern "C"
|
|||||||
typedef struct ompi_coll_tree_t {
|
typedef struct ompi_coll_tree_t {
|
||||||
int32_t tree_root;
|
int32_t tree_root;
|
||||||
int32_t tree_fanout;
|
int32_t tree_fanout;
|
||||||
|
int32_t tree_bmtree;
|
||||||
int32_t tree_prev;
|
int32_t tree_prev;
|
||||||
int32_t tree_next[MAXTREEFANOUT];
|
int32_t tree_next[MAXTREEFANOUT];
|
||||||
int32_t tree_nextsize;
|
int32_t tree_nextsize;
|
||||||
} ompi_coll_tree_t;
|
} ompi_coll_tree_t;
|
||||||
|
|
||||||
typedef struct ompi_coll_bmtree_t {
|
|
||||||
int32_t bmtree_root;
|
|
||||||
int32_t bmtree_prev;
|
|
||||||
int32_t bmtree_next[MAXTREEFANOUT];
|
|
||||||
int32_t bmtree_nextsize;
|
|
||||||
} ompi_coll_bmtree_t;
|
|
||||||
|
|
||||||
typedef struct ompi_coll_chain_t {
|
typedef struct ompi_coll_chain_t {
|
||||||
int32_t chain_root;
|
int32_t chain_root;
|
||||||
int32_t chain_prev;
|
int32_t chain_prev;
|
||||||
@ -55,10 +49,9 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
|||||||
int root );
|
int root );
|
||||||
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
|
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
|
||||||
|
|
||||||
ompi_coll_bmtree_t*
|
ompi_coll_tree_t*
|
||||||
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||||
int root );
|
int root );
|
||||||
int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree );
|
|
||||||
|
|
||||||
ompi_coll_chain_t*
|
ompi_coll_chain_t*
|
||||||
ompi_coll_tuned_topo_build_chain( int fanout,
|
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user