From 607bdf51b6f1e406cc35e7c51324dee171209ebd Mon Sep 17 00:00:00 2001 From: Graham Fagg Date: Tue, 11 Oct 2005 18:51:03 +0000 Subject: [PATCH] Last Cleanup BEFORE adding last two methods and final cross over points. - new mca param calls - move printfs to OPAL_OUTPUT This commit was SVN r7692. --- ompi/mca/coll/tuned/coll_tuned.h | 9 ++- ompi/mca/coll/tuned/coll_tuned_alltoall.c | 50 ++----------- .../coll_tuned_alltoall_decision_dynamic.c | 2 +- .../coll_tuned_alltoall_decision_fixed.c | 2 +- ompi/mca/coll/tuned/coll_tuned_barrier.c | 14 ++-- .../coll_tuned_barrier_decision_dynamic.c | 2 +- .../tuned/coll_tuned_barrier_decision_fixed.c | 2 +- ompi/mca/coll/tuned/coll_tuned_bcast.c | 19 ++--- .../tuned/coll_tuned_bcast_decision_dynamic.c | 2 +- .../tuned/coll_tuned_bcast_decision_fixed.c | 2 +- ompi/mca/coll/tuned/coll_tuned_component.c | 74 +++++++++++-------- .../mca/coll/tuned/coll_tuned_dynamic_rules.c | 59 ++++++++------- ompi/mca/coll/tuned/coll_tuned_module.c | 65 ++++------------ ompi/mca/coll/tuned/coll_tuned_reduce.c | 5 +- .../coll_tuned_reduce_decision_dynamic.c | 2 +- .../tuned/coll_tuned_reduce_decision_fixed.c | 2 +- ompi/mca/coll/tuned/coll_tuned_topo.c | 37 +++++----- ompi/mca/coll/tuned/coll_tuned_util.c | 2 +- 18 files changed, 148 insertions(+), 202 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 0bd0f80797..2250b59c8d 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -37,7 +37,14 @@ extern "C" { */ OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component; -OMPI_COMP_EXPORT extern int mca_coll_tuned_priority_param; + +OMPI_COMP_EXPORT extern int mca_coll_tuned_stream; +OMPI_COMP_EXPORT extern int mca_coll_tuned_priority; +OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit; +OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules; +OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout; +OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout; + /* diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall.c b/ompi/mca/coll/tuned/coll_tuned_alltoall.c index ec1b35126b..78ff48cfee 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall.c @@ -31,36 +31,6 @@ #include #include -/* temp debug routines */ -static int dump_buf_int (char* ptr, int count, char *comment, int rank); - -static int dump_buf_int (char* ptr, int count, char *comment, int rank) { -int i=0; -int *tptr; -int c=0; -tptr=(int*)ptr; -printf("%1d ", rank); -if (comment) printf("%s ", comment); -if (count <0) { - printf("cnt %d?\n", count); - return (0); -} - -if (count>5) c = 5; -else c = count; -printf("Cnt %1d ", count); -for(i=0;ichain_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm)); - if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]); + if (err != MPI_SUCCESS) OPAL_OUTPUT((mca_coll_tuned_stream,"sendcount %d i %d chain_next %d", sendcount, i, chain->chain_next[i])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* end of for each child */ } @@ -197,7 +197,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count, return (MPI_SUCCESS); error_hndl: - fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err); + OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); return (err); } @@ -213,7 +213,7 @@ mca_coll_tuned_bcast_intra_pipeline ( void *buffer, { int rank; /* remove when removing print statement */ rank = ompi_comm_rank(comm); /* remove when removing print statement */ - printf("mca_coll_tuned_bcast_intra_pipeline rank %d root %d\n", rank, root); + OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_pipeline rank %d root %d", rank, root)); return mca_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm, segsize, 1 ); @@ -247,7 +247,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer, size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - printf("mca_coll_tuned_bcast_intra_split_bintree rank %d root %d\n", rank, root); + OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_split_bintree rank %d root %d", rank, root)); if (size == 1) { return MPI_SUCCESS; @@ -470,7 +470,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer, return (MPI_SUCCESS); error_hndl: - fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err); + OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); return (err); } @@ -502,7 +502,7 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer, size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - printf("mca_coll_tuned_bcast_intra_bintree rank %d root %d\n", rank, root); + OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bintree rank %d root %d", rank, root)); if (size == 1) { return MPI_SUCCESS; @@ -677,7 +677,8 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer, return (MPI_SUCCESS); error_hndl: - fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err); + OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); return (err); } + diff --git a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_dynamic.c index cb0fb600f1..b391bafb2d 100644 --- a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_dynamic.c @@ -46,7 +46,7 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, int contig; int dsize; - printf("mca_coll_tuned_bcast_intra_dec_dynamic\n"); + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic")); size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c index 1dd4ccd767..8b8a7d3c10 100644 --- a/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_bcast_decision_fixed.c @@ -47,7 +47,7 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, int contig; int dsize; - printf("mca_coll_tuned_bcast_intra_dec_fixed\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_dec_fixed")); size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index ea4b736fae..36dea05ea3 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -36,11 +36,12 @@ const char *mca_coll_tuned_component_version_string = /* * Global variable */ -int mca_coll_tuned_priority_param = -1; -int mca_coll_tuned_preallocate_memory_comm_size_limit_param = -1; -int mca_coll_tuned_use_dynamic_rules_param = -1; -int mca_coll_tuned_init_tree_fanout_param = -1; -int mca_coll_tuned_init_chain_fanout_param = -1; +int mca_coll_tuned_stream = -1; +int mca_coll_tuned_priority = 30; +int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024); +int mca_coll_tuned_use_dynamic_rules = 0; +int mca_coll_tuned_init_tree_fanout = 4; +int mca_coll_tuned_init_chain_fanout = 4; /* * Local function */ @@ -93,43 +94,54 @@ const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = { static int tuned_open(void) { - printf("Tuned_open called\n"); + int param; + /* mca_coll_tuned_component_t *ct = &mca_coll_tuned_component; */ /* Use a low priority, but allow other components to be lower */ - mca_coll_tuned_priority_param = - mca_base_param_register_int("coll", "tuned", "priority", NULL, 30); + mca_base_param_reg_int(&mca_coll_tuned_component.collm_version, + "priority", + "Priority of the tuned coll component", + false, false, mca_coll_tuned_priority, + &mca_coll_tuned_priority); - /* check the parameter for pre-allocated memory requests etc */ - mca_coll_tuned_preallocate_memory_comm_size_limit_param = - mca_base_param_register_int("coll", "tuned", "pre_allocate_memory", NULL, (32*1024)+1); + /* parameter for pre-allocated memory requests etc */ + mca_base_param_reg_int(&mca_coll_tuned_component.collm_version, + "pre_allocate_memory_comm_size_limit", + "Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!", + false, false, mca_coll_tuned_preallocate_memory_comm_size_limit, + &mca_coll_tuned_preallocate_memory_comm_size_limit); /* by default DISABLE dynamic rules and force the use of fixed [if] rules */ - mca_coll_tuned_use_dynamic_rules_param = - mca_base_param_register_int("coll", "tuned", "use_dynamic_rules", - NULL, 0); + mca_base_param_reg_int(&mca_coll_tuned_component.collm_version, + "use_dynamic_rules", + "Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules", + false, false, mca_coll_tuned_preallocate_memory_comm_size_limit, + &mca_coll_tuned_preallocate_memory_comm_size_limit); /* some initial guesses at topology parameters */ - mca_coll_tuned_init_tree_fanout_param = - mca_base_param_register_int("coll", "tuned", "init_tree_fanout", - NULL, 4); + mca_base_param_reg_int(&mca_coll_tuned_component.collm_version, + "init_tree_fanout", + "Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time", + false, false, mca_coll_tuned_init_tree_fanout, + &mca_coll_tuned_init_tree_fanout); - mca_coll_tuned_init_chain_fanout_param = - mca_base_param_register_int("coll", "tuned", "init_chain_fanout", - NULL, 4); - -/* use the newer interface rsn */ -/* mca_coll_tuned_priority_param = mca_base_param_reg_int(&(ct->super), "priority", "Priority of the tuned coll component", */ -/* false, false, 30, NULL); */ - -/* mca_base_param_reg_int(&(ct->super), "init_tree_fanout", "Fan out used for [balanced] tree topologies in the tuned coll component", */ -/* false, false, 2, NULL); */ - -/* mca_base_param_reg_int(&(ct->super), "init_chain_fanout", */ -/* "Fan out used for chain [1 fanout followed by pipelines] topology in the tuned coll component", */ -/* false, false, 2, NULL); */ + mca_base_param_reg_int(&mca_coll_tuned_component.collm_version, + "init_chain_fanout", + "Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time", + false, false, mca_coll_tuned_init_chain_fanout, + &mca_coll_tuned_init_chain_fanout); + param = mca_base_param_find("coll", NULL, "base_verbose"); + if (param >= 0) { + int verbose; + mca_base_param_lookup_int(param, &verbose); + if (verbose > 0) { + mca_coll_tuned_stream = opal_output_open(NULL); + } + } + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:component_open: done!")); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c index 8fe1c97ca6..d87f1e9030 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c @@ -38,7 +38,7 @@ rule_t* ptr; ptr = (rule_t*) calloc (1, sizeof(rule_t)); if (!ptr) { - fprintf(stderr,"calloc on mk_rule failed!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_rule calloc on mk_rule failed!")); exit (-1); } @@ -56,17 +56,17 @@ condition_t* ptr; condition_t* last; if (!rule) { - fprintf(stderr,"rule given in add_condition_to_rule is NULL?!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule rule given is NULL?!\n")); return (-2); } if (param>=PARAMS) { - fprintf(stderr,"param given in add_condition_to_rule is %d?!\n", param); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule param given is %d?!\n", param)); return (-3); } ptr = (condition_t*) calloc (1, sizeof(condition_t)); -if (!ptr) { - fprintf(stderr,"calloc on add_condition_to_rule failed!\n"); +if (!ptr) { + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule calloc failed!\n")); return (-5); } @@ -99,25 +99,25 @@ int set_rule_links (rule_t * rule, ifp true_fptr, int* true_extraargs, { if (!rule) { - fprintf(stderr,"rule given in set_rule_links is NULL?!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links rule is NULL?")); return (-2); } /* check rule results.. we must have one set for true and one for false */ if ((true_fptr)&&(true_rule)) { - fprintf(stderr,"BAD. Two links for TRUE on rule %d!\n", rule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for TRUE on rule %d!", rule->rule_id)); return (-6); } if ((false_fptr)&&(false_rule)) { - fprintf(stderr,"BAD. Two links for FALSE on rule %d!\n", rule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for FALSE on rule %d!", rule->rule_id)); return (-7); } if ((!true_fptr)&&(!true_rule)) { - fprintf(stderr,"BAD. NO links for TRUE on rule %d!\n", rule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for TRUE on rule %d!", rule->rule_id)); return (-8); } if ((!false_fptr)&&(!false_rule)) { - fprintf(stderr,"BAD. NO links for FALSE on rule %d!\n", rule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for FALSE on rule %d!", rule->rule_id)); return (-9); } @@ -141,7 +141,7 @@ condition_t* next; int i; if (!rule) { - fprintf(stderr,"rule given in free_rule is NULL?!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:free_rule rule is NULL?")); return (-2); } @@ -169,15 +169,15 @@ condition_t* currentcond; int true=1; if (!rule) { - fprintf(stderr,"rule given in eval_rule is NULL?!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule given is NULL?!")); return (-2); } /* first special case is a very fast path... sorta not really grr */ if (!rule->nconditions) { #ifdef VERBOSE - printf("Rule %d has no conditions so forcing first available\n", - rule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Rule %d has no conditions so forcing first available", + rule->rule_id)); #endif /* VERBOSE */ *fptr = rule->true_fptr; return (0); @@ -190,7 +190,7 @@ currentrule = rule; while (currentrule) { /* rules to evaluate */ #ifdef RULEVERBOSE - printf("Eval Rule %d ", currentrule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule evaluating rule %d ", currentrule->rule_id)); #endif /* eval each of the current rules conditions */ @@ -208,7 +208,7 @@ while (currentrule) { /* rules to evaluate */ while ((currentcond)&&(true)) { /* while conditions to eval */ #ifdef RULEVERBOSE - printf("Eval Cond %d ", currentcond->cond_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d ", currentcond->cond_id)); #endif switch (currentcond->op) { case LT: if (params->values[currentcond->param] < currentcond->value) {true = 1;} @@ -227,9 +227,9 @@ while (currentrule) { /* rules to evaluate */ else {true = 0;} break; default: - fprintf(stderr, "Eval: BAD operator of value %d rule %d cond %d\n", + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule BAD operator of value %d rule %d cond %d", currentcond->op, currentrule->rule_id, - currentcond->cond_id); + currentcond->cond_id)); true = 0; return (-1); /* ?! what else can I do, should have caught before */ } /* switch on condition operator */ @@ -240,19 +240,20 @@ while (currentrule) { /* rules to evaluate */ if (!true) { #ifdef RULEVERBOSE - printf("Eval Cond %d returned FALSE\n", currentcond->cond_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned FALSE", currentcond->cond_id)); #endif break; /* if false drop out asap */ } if ((true)&&(currentcond->next)) { /* next condition to check */ #ifdef RULEVERBOSE - printf("Eval Cond %d returned TRUE. Moving to next\n", currentcond->cond_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned TRUE. Moving to next", + currentcond->cond_id)); #endif currentcond = currentcond->next; } else { /* we are true with no more conditions to check */ #ifdef RULEVERBOSE - printf("Eval Cond %d (LAST) returned TRUE.\n", currentcond->cond_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d (LAST) returned TRUE.", currentcond->cond_id)); #endif break; /* so return so we can find out what to do next */ } @@ -263,7 +264,7 @@ while (currentrule) { /* rules to evaluate */ /* we do these IFs in the fasted/most important order */ if ((true)&&(currentrule->true_fptr)) { #ifdef RULEVERBOSE - printf("Eval Rule %d is TRUE returning fptr\n", currentrule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE returning fptr", currentrule->rule_id)); #endif *fptr = currentrule->true_fptr; *extraargs = currentrule->true_extraargs; @@ -271,7 +272,7 @@ while (currentrule) { /* rules to evaluate */ } if ((!true)&&(currentrule->false_fptr)) { #ifdef RULEVERBOSE - printf("Eval Rule %d is FALSE returning fptr\n", currentrule->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is FALSE returning fptr", currentrule->rule_id)); #endif *fptr = currentrule->false_fptr; *extraargs = currentrule->false_extraargs; @@ -279,23 +280,21 @@ while (currentrule) { /* rules to evaluate */ } if (true) { #ifdef RULEVERBOSE - printf("Eval Rule %d is TRUE jumping to next rule %d\n", - currentrule->rule_id, - ((rule_t*)(currentrule->next_true_rule))->rule_id); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE jumping to next rule %d", + currentrule->rule_id, ((rule_t*)(currentrule->next_true_rule))->rule_id)); #endif currentrule = (rule_t *) currentrule->next_true_rule; } else { /* i.e. not true / lazy eval */ #ifdef RULEVERBOSE - printf("Eval Rule %d is FALSE jumping to next rule %d\n", - currentrule->rule_id, - ((rule_t*)(currentrule->next_false_rule))->rule_id); + OPAL_OUTPUT((mca_coll_tuned_stream, "Eval Rule %d is FALSE jumping to next rule %d", + currentrule->rule_id, ((rule_t*)(currentrule->next_false_rule))->rule_id)); #endif currentrule = (rule_t *) currentrule->next_false_rule; } if (!currentrule) { - fprintf(stderr, "eval: disaster, we have gone off into the weeds.. panic!\n"); + OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Disaster, we have gone off into the weeds.. panic!")); exit (-10); } } diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index ee69ea9850..dd1b640218 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -253,14 +253,9 @@ const mca_coll_base_module_1_0_0_t * mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority, struct mca_coll_base_comm_t **data) { - int use_dynamic = -1; - - printf("Tuned query called\n"); - if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_tuned_priority_param, - priority)) { - return NULL; - } + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:module_tuned query called")); + *priority = mca_coll_tuned_priority; /* * Choose whether to use [intra|inter] decision functions @@ -270,27 +265,20 @@ mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority, * */ - if (OMPI_SUCCESS != - mca_base_param_lookup_int(mca_coll_tuned_use_dynamic_rules_param, - &use_dynamic)) { - printf("No use_dynamic param found!\n"); - return NULL; - } - if (OMPI_COMM_IS_INTER(comm)) { - if (use_dynamic) { -printf("using inter_dynamic\n"); + if (mca_coll_tuned_use_dynamic_rules) { + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); to_use = &inter_dynamic; } else { -printf("using inter_fixed\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); to_use = &inter_fixed; } } else { /* is an intra comm */ - if (use_dynamic) { -printf("using intra_dynamic\n"); + if (mca_coll_tuned_use_dynamic_rules) { + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); to_use = &intra_dynamic; } else { -printf("using intra_fixed\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); to_use = &intra_fixed; } } @@ -307,13 +295,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) int size; struct mca_coll_base_comm_t *data; /* fanout parameters */ - int tree_fanout_default = 0; - int chain_fanout_default = 0; - int pre_allocate_limit = -1; int pre_allocate = 1; - printf("Tuned init module called.\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init called.")); /* This routine will become more complex and might have to be */ /* broken into more sections/function calls */ @@ -347,15 +332,8 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) * */ - if (OMPI_SUCCESS != - mca_base_param_lookup_int(mca_coll_tuned_preallocate_memory_comm_size_limit_param, - &pre_allocate_limit)) { - printf("No pre_allocate param found!\n"); - return NULL; - } - /* if we within the memory/size limit, allow preallocated data */ - if (size<=pre_allocate_limit) { + if (size<=mca_coll_tuned_preallocate_memory_comm_size_limit) { data = malloc(sizeof(struct mca_coll_base_comm_t) + (sizeof(ompi_request_t *) * size * 2)); @@ -379,23 +357,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) * guess the initial topologies to use rank 0 as root */ - /* get default fanouts is made available via the MCA */ - if (OMPI_SUCCESS != - mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param, - &tree_fanout_default)) { - printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n"); - } - if (OMPI_SUCCESS != - mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param, - &chain_fanout_default)) { - printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n"); - } - - /* general n fan out tree */ - data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0); + data->cached_ntree = ompi_coll_tuned_topo_build_tree (mca_coll_tuned_init_tree_fanout, comm, 0); data->cached_ntree_root = 0; - data->cached_ntree_fanout = tree_fanout_default; + data->cached_ntree_fanout = mca_coll_tuned_init_tree_fanout; /* binary tree */ data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); @@ -413,9 +378,9 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) * will probably change how we cache this later, for now a midsize * GEF */ - data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0); + data->cached_chain = ompi_coll_tuned_topo_build_chain (mca_coll_tuned_init_chain_fanout, comm, 0); data->cached_chain_root = 0; - data->cached_chain_fanout = chain_fanout_default; + data->cached_chain_fanout = mca_coll_tuned_init_chain_fanout; /* standard pipeline */ data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); @@ -425,7 +390,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm) comm->c_coll_selected_data = data; - printf("Tuned looks like it is in use :)\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); return to_use; } diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce.c b/ompi/mca/coll/tuned/coll_tuned_reduce.c index 0fd681721a..285ab7927c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce.c @@ -277,7 +277,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, for (segindex = 0; segindex < num_segments; segindex++) { if (segindex < num_segments-1) sendcount = segcount; else sendcount = count - segindex*segcount; - ret = MCA_PML_CALL( send(sendbuf+segindex*realsegsize, sendcount, + ret = MCA_PML_CALL( send((char*)sendbuf+segindex*realsegsize, sendcount, datatype, chain->chain_prev, MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD, comm) ); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } @@ -288,8 +288,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, /* error handler */ error_hndl: - opal_output( 0, "ERROR_HNDL: node %d file %s line %d error %d\n", - rank, __FILE__, line, ret ); + OPAL_OUTPUT (( mca_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); if( inbuf != NULL ) { if( inbuf[0] != NULL ) free(inbuf[0]); if( inbuf[1] != NULL ) free(inbuf[1]); diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_reduce_decision_dynamic.c index 5affb162d8..becd2e93a2 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce_decision_dynamic.c @@ -49,7 +49,7 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, int contig; int dsize; - printf("mca_coll_tuned_reduce_intra_dec_dynamic\n"); + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic")); size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_reduce_decision_fixed.c index 6c511f0b18..2727204ca6 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce_decision_fixed.c @@ -48,7 +48,7 @@ int mca_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, int contig; int dsize; - printf("mca_coll_tuned_reduce_intra_dec_fixed\n"); + OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_reduce_intra_dec_fixed")); size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.c b/ompi/mca/coll/tuned/coll_tuned_topo.c index b5023b82cf..e8d830e596 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.c +++ b/ompi/mca/coll/tuned/coll_tuned_topo.c @@ -23,6 +23,7 @@ #include "mca/coll/coll.h" #include "mca/coll/base/coll_tags.h" #include "mca/pml/pml.h" +#include "coll_tuned.h" #include "coll_tuned_topo.h" /* @@ -71,14 +72,14 @@ ompi_coll_tuned_topo_build_tree( int fanout, int i; ompi_coll_tree_t* tree; - printf("Building tuned topo tree: fo %d rt %d\n", fanout, root); + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root)); if (fanout<1) { - printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout); + OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout)); return NULL; } if (fanout>MAXTREEFANOUT) { - printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT)); return NULL; } @@ -90,8 +91,7 @@ ompi_coll_tuned_topo_build_tree( int fanout, tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t)); if (!tree) { - printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n"); - fflush(stdout); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory")); return NULL; } @@ -190,7 +190,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, ompi_coll_tree_t *bmtree; int i; - printf("Building tuned topo bmtree: rt %d\n", root); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root)); /* * Get size and rank of the process in this communicator @@ -202,8 +202,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t)); if (!bmtree) { - printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n"); - fflush(stdout); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory")); return NULL; } @@ -234,7 +233,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, remote += root; if( remote >= size ) remote -= size; if (childs==MAXTREEFANOUT) { - printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs)); return NULL; } bmtree->tree_next[childs] = remote; @@ -258,7 +257,7 @@ ompi_coll_tuned_topo_build_chain( int fanout, int mark,head,len; ompi_coll_chain_t *chain; - printf("Building tuned topo chain: fo %d rt %d\n", fanout, root); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root)); /* * Get size and rank of the process in this communicator @@ -270,7 +269,7 @@ ompi_coll_tuned_topo_build_chain( int fanout, return NULL; } if (fanout>MAXTREEFANOUT) { - printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT)); return NULL; } @@ -279,7 +278,7 @@ ompi_coll_tuned_topo_build_chain( int fanout, */ chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) ); if (!chain) { - printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n"); + OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory")); fflush(stdout); return NULL; } @@ -421,24 +420,22 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain ) int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank) { int i; -printf("%1d tree root %d fanout %d BM %1d nextsize %d prev %d\n", rank, - tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev ); +OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank, + tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev)); if (tree->tree_nextsize) { - for (i=0;itree_nextsize;i++) printf("[%1d] %d ", i, tree->tree_next[i]); + for (i=0;itree_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i])); } -printf("\n"); return (0); } int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank) { int i; -printf("%1d chain root %d fanout %d nextsize %d prev %d\n", rank, - chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev ); +OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank, + chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev)); if (chain->chain_nextsize) { - for (i=0;ichain_nextsize;i++) printf("[%1d] %d ", i, chain->chain_next[i]); + for (i=0;ichain_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i])); } -printf("\n"); return (0); } diff --git a/ompi/mca/coll/tuned/coll_tuned_util.c b/ompi/mca/coll/tuned/coll_tuned_util.c index 9b0bbb22b2..4b0fc5037d 100644 --- a/ompi/mca/coll/tuned/coll_tuned_util.c +++ b/ompi/mca/coll/tuned/coll_tuned_util.c @@ -55,7 +55,7 @@ ompi_status_public_t statuses[2]; return (MPI_SUCCESS); error_handler: - fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err); + OPAL_OUTPUT ((mca_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err)); return (err); }