diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 1a49bc3d6a..2eda6bb52c 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -444,13 +444,13 @@ typedef struct rule_s { struct mca_coll_base_comm_t { /* standard data for requests and PML usage */ - /* we need to keep this here for now incase we fall through to the - * basic functions that expect these fields/and memory to be - * avaliable (GEF something for JS?) + /* Precreate space for requests + * Note this does not effect basic, + * but if in wrong context can confuse a debugger */ - ompi_request_t **mccb_reqs; - int mccb_num_reqs; + ompi_request_t **mcct_reqs; + int mcct_num_reqs; /* * tuned topo information caching per communicator @@ -461,17 +461,28 @@ struct mca_coll_base_comm_t { * */ - ompi_coll_tree_t *cached_tree; - int cached_tree_root; - int cached_tree_fanout; + /* general tree with n fan out */ + ompi_coll_tree_t *cached_ntree; + int cached_ntree_root; + int cached_ntree_fanout; - ompi_coll_bmtree_t *cached_bmtree; + /* binary tree */ + ompi_coll_tree_t *cached_bintree; + int cached_bintree_root; + + /* binomial tree */ + ompi_coll_tree_t *cached_bmtree; int cached_bmtree_root; + /* chained tree (fanout followed by pipelines) */ ompi_coll_chain_t *cached_chain; int cached_chain_root; int cached_chain_fanout; + /* pipeline */ + ompi_coll_chain_t *cached_pipeline; + int cached_pipeline_root; + /* extra data required by the decision functions */ rule_t* decision_table; }; diff --git a/ompi/mca/coll/tuned/coll_tuned_bcast.c b/ompi/mca/coll/tuned/coll_tuned_bcast.c index 36dfeea935..8264bf0fea 100644 --- a/ompi/mca/coll/tuned/coll_tuned_bcast.c +++ b/ompi/mca/coll/tuned/coll_tuned_bcast.c @@ -171,6 +171,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count, chain->chain_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm)); + if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* end of for each child */ } @@ -256,17 +257,15 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer, * other wise recreate it. */ - if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root) - && (comm->c_coll_selected_data->cached_tree_fanout == 2)) { - tree = comm->c_coll_selected_data->cached_tree; + if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) { + tree = comm->c_coll_selected_data->cached_bintree; } else { - if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */ - ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree); + if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous tree if defined */ + ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); } - comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); - comm->c_coll_selected_data->cached_tree_root = root; - comm->c_coll_selected_data->cached_tree_fanout = 2; + comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); + comm->c_coll_selected_data->cached_bintree_root = root; } @@ -513,17 +512,15 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer, * other wise recreate it. */ - if ((comm->c_coll_selected_data->cached_tree) && (comm->c_coll_selected_data->cached_tree_root == root) - && (comm->c_coll_selected_data->cached_tree_fanout == 2)) { - tree = comm->c_coll_selected_data->cached_tree; + if ((comm->c_coll_selected_data->cached_bintree) && (comm->c_coll_selected_data->cached_bintree_root == root)) { + tree = comm->c_coll_selected_data->cached_bintree; } else { - if (comm->c_coll_selected_data->cached_tree) { /* destroy previous tree if defined */ - ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_tree); + if (comm->c_coll_selected_data->cached_bintree) { /* destroy previous bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); } - comm->c_coll_selected_data->cached_tree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); - comm->c_coll_selected_data->cached_tree_root = root; - comm->c_coll_selected_data->cached_tree_fanout = 2; + comm->c_coll_selected_data->cached_bintree = tree = ompi_coll_tuned_topo_build_tree( 2, comm, root ); + comm->c_coll_selected_data->cached_bintree_root = root; } diff --git a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c index 3e4e403740..e6b094e2cb 100644 --- a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c @@ -53,9 +53,11 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, rank = ompi_comm_rank(comm); /* err = mca_coll_tuned_bcast_intra_linear (buff, count, datatype, root, comm); */ -/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (8192)); */ +/* err = mca_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, (0)); */ +/* err = mca_coll_tuned_bcast_intra_chain (buff, count, datatype, root, comm, (0), 1); */ /* err = mca_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, (8192)); */ - err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (1024)); +/* err = mca_coll_tuned_bcast_intra_split_bintree (buff, count, datatype, root, comm, (100)); */ + err = mca_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, (100)); return err; } diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 18964ee437..dea44fdc51 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -108,7 +108,7 @@ static int tuned_open(void) /* some initial guesses at topology parameters */ mca_coll_tuned_init_tree_fanout_param = mca_base_param_register_int("coll", "tuned", "init_tree_fanout", - NULL, 2); + NULL, 4); mca_coll_tuned_init_chain_fanout_param = mca_base_param_register_int("coll", "tuned", "init_chain_fanout", diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index dfb672de22..662b9505e8 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -306,7 +306,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) int size; struct mca_coll_base_comm_t *data; /* fanout parameters */ - int tree_fanout_default = 2; + int tree_fanout_default = 4; int chain_fanout_default = 4; @@ -331,14 +331,22 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) } else { size = ompi_comm_size(comm); } + + /* + * we still malloc data as it is used by BOTH the TUNED and the BASIC modules + * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers + * we place any special info after the default data + * + */ + data = malloc(sizeof(struct mca_coll_base_comm_t) + (sizeof(ompi_request_t *) * size * 2)); if (NULL == data) { return NULL; } - data->mccb_reqs = (ompi_request_t **) (data + 1); - data->mccb_num_reqs = size * 2; + data->mcct_reqs = (ompi_request_t **) (data + 1); + data->mcct_num_reqs = size * 2; /* * now for the cached topo functions @@ -350,23 +358,26 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param, &tree_fanout_default)) { printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n"); - tree_fanout_default = 2; /* make it binary if failed lookup. */ } if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param, &chain_fanout_default)) { printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n"); - chain_fanout_default = 4; } - - data->cached_tree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, - comm, 0); - data->cached_tree_root = 0; - data->cached_tree_fanout = tree_fanout_default; + + /* general n fan out tree */ + data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0); + data->cached_ntree_root = 0; + data->cached_ntree_fanout = tree_fanout_default; + /* binary tree */ + data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); + data->cached_bintree_root = 0; + + /* binomial tree */ data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); - data->cached_tree_root = 0; + data->cached_bmtree_root = 0; /* * chains (fanout followed by pipelines) @@ -376,11 +387,14 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) * will probably change how we cache this later, for now a midsize * GEF */ - data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, - comm, 0); + data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0); data->cached_chain_root = 0; data->cached_chain_fanout = chain_fanout_default; + /* standard pipeline */ + data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); + data->cached_pipeline_root = 0; + /* All done */ comm->c_coll_selected_data = data; @@ -403,10 +417,28 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm) /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing the generel c_coll_selected_data */ - comm->c_coll_selected_data->mccb_reqs = NULL; - comm->c_coll_selected_data->mccb_num_reqs = 0; + comm->c_coll_selected_data->mcct_reqs = NULL; + comm->c_coll_selected_data->mcct_num_reqs = 0; #endif + /* free any cached information that has been allocated */ + if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */ + ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree); + } + if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); + } + if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */ + ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree); + } + if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */ + ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain); + } + if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */ + ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline); + } + + /* All done */ free(comm->c_coll_selected_data); diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.c b/ompi/mca/coll/tuned/coll_tuned_topo.c index 9a6f372dbd..573f2a56b0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.c +++ b/ompi/mca/coll/tuned/coll_tuned_topo.c @@ -73,6 +73,15 @@ ompi_coll_tuned_topo_build_tree( int fanout, printf("Building tuned topo tree: fo %d rt %d\n", fanout, root); + if (fanout<1) { + printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout); + return NULL; + } + if (fanout>MAXTREEFANOUT) { + printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT); + return NULL; + } + /* * Get size and rank of the process in this communicator */ @@ -80,17 +89,15 @@ ompi_coll_tuned_topo_build_tree( int fanout, rank = ompi_comm_rank(comm); tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t)); + if (!tree) { + printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n"); + fflush(stdout); + return NULL; + } + tree->tree_root = MPI_UNDEFINED; tree->tree_nextsize = MPI_UNDEFINED; - /* - * Check if we calculated the tree for this root and - * fanout combination already (on this communicator) - */ - if( (root == tree->tree_root) && (fanout == tree->tree_fanout) ) { - return tree; - } - /* * Set root */ @@ -100,6 +107,7 @@ ompi_coll_tuned_topo_build_tree( int fanout, * Initialize tree */ tree->tree_fanout = fanout; + tree->tree_bmtree = 0; tree->tree_root = root; tree->tree_prev = -1; tree->tree_nextsize = 0; @@ -155,10 +163,23 @@ ompi_coll_tuned_topo_build_tree( int fanout, int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree ) { + ompi_coll_tree_t *ptr; + + return 0; + + if ((!tree)||(!*tree)) { + return OMPI_SUCCESS; + } + + ptr = *tree; + + free (ptr); + *tree = NULL; /* mark tree as gone */ + return OMPI_SUCCESS; } -ompi_coll_bmtree_t* +ompi_coll_tree_t* ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, int root ) { @@ -168,7 +189,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, int mask = 1; int index; int remote; - ompi_coll_bmtree_t *bmtree; + ompi_coll_tree_t *bmtree; + int i; printf("Building tuned topo bmtree: rt %d\n", root); @@ -180,13 +202,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, index = rank -root; - bmtree = (ompi_coll_bmtree_t*)malloc(sizeof(ompi_coll_bmtree_t)); - bmtree->bmtree_root = MPI_UNDEFINED; - bmtree->bmtree_nextsize = MPI_UNDEFINED; + bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t)); + if (!bmtree) { + printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n"); + fflush(stdout); + return NULL; + } - if( bmtree->bmtree_root == root ) { - /* the bmtree was computed before */ - return bmtree; + bmtree->tree_bmtree = 1; + + bmtree->tree_root = MPI_UNDEFINED; + bmtree->tree_nextsize = MPI_UNDEFINED; + for(i=0;itree_next[i] = -1; } if( index < 0 ) index += size; @@ -195,11 +223,11 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, /* Now I can compute my father rank */ if( root == rank ) { - bmtree->bmtree_prev = root; + bmtree->tree_prev = root; } else { remote = (index ^ (mask >> 1)) + root; if( remote >= size ) remote -= size; - bmtree->bmtree_prev = remote; + bmtree->tree_prev = remote; } /* And now let's fill my childs */ while( mask < size ) { @@ -207,19 +235,19 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, if( remote >= size ) break; remote += root; if( remote >= size ) remote -= size; - bmtree->bmtree_next[childs] = remote; + if (childs==MAXTREEFANOUT) { + printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs); + return NULL; + } + bmtree->tree_next[childs] = remote; mask <<= 1; childs++; } - bmtree->bmtree_nextsize = childs; - bmtree->bmtree_root = root; + bmtree->tree_nextsize = childs; + bmtree->tree_root = root; return bmtree; } -int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree ) -{ - return OMPI_SUCCESS; -} ompi_coll_chain_t* ompi_coll_tuned_topo_build_chain( int fanout, @@ -240,7 +268,11 @@ ompi_coll_tuned_topo_build_chain( int fanout, size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - if( fanout > MAXTREEFANOUT ) { + if( fanout < 1 ) { + return NULL; + } + if (fanout>MAXTREEFANOUT) { + printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT); return NULL; } @@ -248,17 +280,16 @@ ompi_coll_tuned_topo_build_chain( int fanout, * Allocate space for topology arrays if needed */ chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) ); + if (!chain) { + printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n"); + fflush(stdout); + return NULL; + } chain->chain_root = MPI_UNDEFINED; chain->chain_nextsize = -1; chain->chain_numchain = -1; + for(i=0;ichain_next[i] = -1; - /* - * Check if we calculated the topology for this root and comm - */ - if( (root == chain->chain_root) && - (fanout == chain->chain_numchain) ) { - return chain; - } /* * Set root & numchain */ @@ -374,5 +405,18 @@ ompi_coll_tuned_topo_build_chain( int fanout, int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain ) { + ompi_coll_chain_t *ptr; + + return 0; + + if ((!chain)||(!*chain)) { + return OMPI_SUCCESS; + } + + ptr = *chain; + + free (ptr); + *chain = NULL; /* mark chain as gone */ + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.h b/ompi/mca/coll/tuned/coll_tuned_topo.h index 582f6a40fb..c6fdd4253c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.h +++ b/ompi/mca/coll/tuned/coll_tuned_topo.h @@ -29,18 +29,12 @@ extern "C" typedef struct ompi_coll_tree_t { int32_t tree_root; int32_t tree_fanout; + int32_t tree_bmtree; int32_t tree_prev; int32_t tree_next[MAXTREEFANOUT]; int32_t tree_nextsize; } ompi_coll_tree_t; -typedef struct ompi_coll_bmtree_t { - int32_t bmtree_root; - int32_t bmtree_prev; - int32_t bmtree_next[MAXTREEFANOUT]; - int32_t bmtree_nextsize; -} ompi_coll_bmtree_t; - typedef struct ompi_coll_chain_t { int32_t chain_root; int32_t chain_prev; @@ -55,10 +49,9 @@ ompi_coll_tuned_topo_build_tree( int fanout, int root ); int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree ); -ompi_coll_bmtree_t* +ompi_coll_tree_t* ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, int root ); -int ompi_coll_tuned_topo_destroy_bmtree( ompi_coll_bmtree_t** bmtree ); ompi_coll_chain_t* ompi_coll_tuned_topo_build_chain( int fanout,