1
1

Last Cleanup BEFORE adding last two methods and final cross over points.

- new mca param calls
- move printfs to OPAL_OUTPUT

This commit was SVN r7692.
Этот коммит содержится в:
Graham Fagg 2005-10-11 18:51:03 +00:00
родитель b42d4ac780
Коммит 607bdf51b6
18 изменённых файлов: 148 добавлений и 202 удалений

Просмотреть файл

@ -37,7 +37,14 @@ extern "C" {
*/
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority_param;
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
/*

Просмотреть файл

@ -31,36 +31,6 @@
#include <sys/types.h>
#include <unistd.h>
/* temp debug routines */
static int dump_buf_int (char* ptr, int count, char *comment, int rank);
static int dump_buf_int (char* ptr, int count, char *comment, int rank) {
int i=0;
int *tptr;
int c=0;
tptr=(int*)ptr;
printf("%1d ", rank);
if (comment) printf("%s ", comment);
if (count <0) {
printf("cnt %d?\n", count);
return (0);
}
if (count>5) c = 5;
else c = count;
printf("Cnt %1d ", count);
for(i=0;i<c;i++) {
printf("%1d [%1d] ", i, *tptr++);
}
if (c!=count) {
tptr=(int*)ptr;
printf(" ... %1d [%1d]", count-1, tptr[count-1]);
}
printf("\n");
return (0);
}
int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
@ -78,7 +48,7 @@ int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_alltoall_intra_pairwise rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_pairwise rank %d", rank));
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
@ -109,8 +79,7 @@ int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
return MPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
__FILE__,line,err,rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}
@ -137,7 +106,7 @@ int mca_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_alltoall_intra_bruck rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_bruck rank %d", rank));
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
@ -268,8 +237,7 @@ int mca_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
return OMPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
__FILE__,line,err,rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
if (tmpbuf != NULL) free(tmpbuf);
if (packbuf != NULL) free(packbuf);
if (weallocated) {
@ -296,7 +264,7 @@ int mca_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_alltoall_intra_two_procs rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_alltoall_intra_two_procs rank %d", rank));
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -327,8 +295,7 @@ int mca_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
return MPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
__FILE__,line,err,rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}
@ -350,7 +317,7 @@ int mca_coll_tuned_alltoall_intra_linear(void *sbuf, int scount,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_alltoall_intra_linear rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_alltoall_intra_linear rank %d", rank));
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
@ -360,8 +327,7 @@ int mca_coll_tuned_alltoall_intra_linear(void *sbuf, int scount,
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
__FILE__,line,err,rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}

Просмотреть файл

@ -49,7 +49,7 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
int contig;
int dsize;
printf("mca_coll_tuned_alltoall_intra_dec_dynamic\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -49,7 +49,7 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
int contig;
int dsize;
printf("mca_coll_tuned_alltoall_intra_dec_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -42,7 +42,7 @@ int mca_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
printf("mca_coll_tuned_bcast_intra_doublering rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_doublering rank %d", rank));
left = ((rank-1)%size);
right = ((rank+1)%size);
@ -87,7 +87,7 @@ int mca_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
return MPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}
@ -100,7 +100,7 @@ int mca_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
printf("mca_coll_tuned_bcast_intra_recursivedoubling rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_recursivedoubling rank %d", rank));
/* do nearest power of 2 less than size calc */
adjsize = 1;
@ -162,7 +162,7 @@ int mca_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
return MPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}
@ -175,7 +175,7 @@ int mca_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
printf("mca_coll_tuned_bcast_intra_bruck rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bruck rank %d", rank));
/* exchange data with rank-2^k and rank+2^k */
for (distance = 1; distance < size; distance <<= 1) {
@ -190,7 +190,7 @@ int mca_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
return MPI_SUCCESS;
err_hndl:
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err;
}
@ -202,7 +202,7 @@ int mca_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
int err=0;
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_bcast_intra_two_procs rank %d\n", rank);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_two_procs rank %d", rank));
if (0==rank) {
err = coll_tuned_sendrecv (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER,

Просмотреть файл

@ -44,7 +44,7 @@ int mca_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
int contig;
int dsize;
printf("mca_coll_tuned_barrier_intra_dec_dynamic\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -44,7 +44,7 @@ int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
int contig;
int dsize;
printf("mca_coll_tuned_barrier_intra_dec_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_barrier_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -49,7 +49,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_bcast_intra_chain rank %d root %d\n", rank, root);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_chain rank %d root %d", rank, root));
if( size == 1 ) {
return MPI_SUCCESS;
@ -104,7 +104,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
/* set the buffer pointer */
tmpbuf = (char *)buff;
/* printf("%1d chain root %d num_segments %d\n", rank, root, num_segments); */
/* OPAL_OUTPUT((mca_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */
/* root code */
if( rank == root ) {
@ -173,7 +173,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
chain->chain_next[i],
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]);
if (err != MPI_SUCCESS) OPAL_OUTPUT((mca_coll_tuned_stream,"sendcount %d i %d chain_next %d", sendcount, i, chain->chain_next[i]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} /* end of for each child */
}
@ -197,7 +197,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
return (MPI_SUCCESS);
error_hndl:
fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return (err);
}
@ -213,7 +213,7 @@ mca_coll_tuned_bcast_intra_pipeline ( void *buffer,
{
int rank; /* remove when removing print statement */
rank = ompi_comm_rank(comm); /* remove when removing print statement */
printf("mca_coll_tuned_bcast_intra_pipeline rank %d root %d\n", rank, root);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_pipeline rank %d root %d", rank, root));
return mca_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm,
segsize, 1 );
@ -247,7 +247,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_bcast_intra_split_bintree rank %d root %d\n", rank, root);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_split_bintree rank %d root %d", rank, root));
if (size == 1) {
return MPI_SUCCESS;
@ -470,7 +470,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
return (MPI_SUCCESS);
error_hndl:
fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return (err);
}
@ -502,7 +502,7 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("mca_coll_tuned_bcast_intra_bintree rank %d root %d\n", rank, root);
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bintree rank %d root %d", rank, root));
if (size == 1) {
return MPI_SUCCESS;
@ -677,7 +677,8 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
return (MPI_SUCCESS);
error_hndl:
fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err);
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return (err);
}

Просмотреть файл

@ -46,7 +46,7 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
int contig;
int dsize;
printf("mca_coll_tuned_bcast_intra_dec_dynamic\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -47,7 +47,7 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
int contig;
int dsize;
printf("mca_coll_tuned_bcast_intra_dec_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -36,11 +36,12 @@ const char *mca_coll_tuned_component_version_string =
/*
* Global variable
*/
int mca_coll_tuned_priority_param = -1;
int mca_coll_tuned_preallocate_memory_comm_size_limit_param = -1;
int mca_coll_tuned_use_dynamic_rules_param = -1;
int mca_coll_tuned_init_tree_fanout_param = -1;
int mca_coll_tuned_init_chain_fanout_param = -1;
int mca_coll_tuned_stream = -1;
int mca_coll_tuned_priority = 30;
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
int mca_coll_tuned_use_dynamic_rules = 0;
int mca_coll_tuned_init_tree_fanout = 4;
int mca_coll_tuned_init_chain_fanout = 4;
/*
* Local function
*/
@ -93,43 +94,54 @@ const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = {
static int tuned_open(void)
{
printf("Tuned_open called\n");
int param;
/* mca_coll_tuned_component_t *ct = &mca_coll_tuned_component; */
/* Use a low priority, but allow other components to be lower */
mca_coll_tuned_priority_param =
mca_base_param_register_int("coll", "tuned", "priority", NULL, 30);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"priority",
"Priority of the tuned coll component",
false, false, mca_coll_tuned_priority,
&mca_coll_tuned_priority);
/* check the parameter for pre-allocated memory requests etc */
mca_coll_tuned_preallocate_memory_comm_size_limit_param =
mca_base_param_register_int("coll", "tuned", "pre_allocate_memory", NULL, (32*1024)+1);
/* parameter for pre-allocated memory requests etc */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"pre_allocate_memory_comm_size_limit",
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
false, false, mca_coll_tuned_preallocate_memory_comm_size_limit,
&mca_coll_tuned_preallocate_memory_comm_size_limit);
/* by default DISABLE dynamic rules and force the use of fixed [if] rules */
mca_coll_tuned_use_dynamic_rules_param =
mca_base_param_register_int("coll", "tuned", "use_dynamic_rules",
NULL, 0);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"use_dynamic_rules",
"Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules",
false, false, mca_coll_tuned_preallocate_memory_comm_size_limit,
&mca_coll_tuned_preallocate_memory_comm_size_limit);
/* some initial guesses at topology parameters */
mca_coll_tuned_init_tree_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
NULL, 4);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"init_tree_fanout",
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, mca_coll_tuned_init_tree_fanout,
&mca_coll_tuned_init_tree_fanout);
mca_coll_tuned_init_chain_fanout_param =
mca_base_param_register_int("coll", "tuned", "init_chain_fanout",
NULL, 4);
/* use the newer interface rsn */
/* mca_coll_tuned_priority_param = mca_base_param_reg_int(&(ct->super), "priority", "Priority of the tuned coll component", */
/* false, false, 30, NULL); */
/* mca_base_param_reg_int(&(ct->super), "init_tree_fanout", "Fan out used for [balanced] tree topologies in the tuned coll component", */
/* false, false, 2, NULL); */
/* mca_base_param_reg_int(&(ct->super), "init_chain_fanout", */
/* "Fan out used for chain [1 fanout followed by pipelines] topology in the tuned coll component", */
/* false, false, 2, NULL); */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"init_chain_fanout",
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, mca_coll_tuned_init_chain_fanout,
&mca_coll_tuned_init_chain_fanout);
param = mca_base_param_find("coll", NULL, "base_verbose");
if (param >= 0) {
int verbose;
mca_base_param_lookup_int(param, &verbose);
if (verbose > 0) {
mca_coll_tuned_stream = opal_output_open(NULL);
}
}
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:component_open: done!"));
return OMPI_SUCCESS;
}

Просмотреть файл

@ -38,7 +38,7 @@ rule_t* ptr;
ptr = (rule_t*) calloc (1, sizeof(rule_t));
if (!ptr) {
fprintf(stderr,"calloc on mk_rule failed!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_rule calloc on mk_rule failed!"));
exit (-1);
}
@ -56,17 +56,17 @@ condition_t* ptr;
condition_t* last;
if (!rule) {
fprintf(stderr,"rule given in add_condition_to_rule is NULL?!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule rule given is NULL?!\n"));
return (-2);
}
if (param>=PARAMS) {
fprintf(stderr,"param given in add_condition_to_rule is %d?!\n", param);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule param given is %d?!\n", param));
return (-3);
}
ptr = (condition_t*) calloc (1, sizeof(condition_t));
if (!ptr) {
fprintf(stderr,"calloc on add_condition_to_rule failed!\n");
if (!ptr) {
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule calloc failed!\n"));
return (-5);
}
@ -99,25 +99,25 @@ int set_rule_links (rule_t * rule, ifp true_fptr, int* true_extraargs,
{
if (!rule) {
fprintf(stderr,"rule given in set_rule_links is NULL?!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links rule is NULL?"));
return (-2);
}
/* check rule results.. we must have one set for true and one for false */
if ((true_fptr)&&(true_rule)) {
fprintf(stderr,"BAD. Two links for TRUE on rule %d!\n", rule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for TRUE on rule %d!", rule->rule_id));
return (-6);
}
if ((false_fptr)&&(false_rule)) {
fprintf(stderr,"BAD. Two links for FALSE on rule %d!\n", rule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for FALSE on rule %d!", rule->rule_id));
return (-7);
}
if ((!true_fptr)&&(!true_rule)) {
fprintf(stderr,"BAD. NO links for TRUE on rule %d!\n", rule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for TRUE on rule %d!", rule->rule_id));
return (-8);
}
if ((!false_fptr)&&(!false_rule)) {
fprintf(stderr,"BAD. NO links for FALSE on rule %d!\n", rule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for FALSE on rule %d!", rule->rule_id));
return (-9);
}
@ -141,7 +141,7 @@ condition_t* next;
int i;
if (!rule) {
fprintf(stderr,"rule given in free_rule is NULL?!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:free_rule rule is NULL?"));
return (-2);
}
@ -169,15 +169,15 @@ condition_t* currentcond;
int true=1;
if (!rule) {
fprintf(stderr,"rule given in eval_rule is NULL?!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule given is NULL?!"));
return (-2);
}
/* first special case is a very fast path... sorta not really grr */
if (!rule->nconditions) {
#ifdef VERBOSE
printf("Rule %d has no conditions so forcing first available\n",
rule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Rule %d has no conditions so forcing first available",
rule->rule_id));
#endif /* VERBOSE */
*fptr = rule->true_fptr;
return (0);
@ -190,7 +190,7 @@ currentrule = rule;
while (currentrule) { /* rules to evaluate */
#ifdef RULEVERBOSE
printf("Eval Rule %d ", currentrule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule evaluating rule %d ", currentrule->rule_id));
#endif
/* eval each of the current rules conditions */
@ -208,7 +208,7 @@ while (currentrule) { /* rules to evaluate */
while ((currentcond)&&(true)) { /* while conditions to eval */
#ifdef RULEVERBOSE
printf("Eval Cond %d ", currentcond->cond_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d ", currentcond->cond_id));
#endif
switch (currentcond->op) {
case LT: if (params->values[currentcond->param] < currentcond->value) {true = 1;}
@ -227,9 +227,9 @@ while (currentrule) { /* rules to evaluate */
else {true = 0;}
break;
default:
fprintf(stderr, "Eval: BAD operator of value %d rule %d cond %d\n",
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule BAD operator of value %d rule %d cond %d",
currentcond->op, currentrule->rule_id,
currentcond->cond_id);
currentcond->cond_id));
true = 0;
return (-1); /* ?! what else can I do, should have caught before */
} /* switch on condition operator */
@ -240,19 +240,20 @@ while (currentrule) { /* rules to evaluate */
if (!true) {
#ifdef RULEVERBOSE
printf("Eval Cond %d returned FALSE\n", currentcond->cond_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned FALSE", currentcond->cond_id));
#endif
break; /* if false drop out asap */
}
if ((true)&&(currentcond->next)) { /* next condition to check */
#ifdef RULEVERBOSE
printf("Eval Cond %d returned TRUE. Moving to next\n", currentcond->cond_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned TRUE. Moving to next",
currentcond->cond_id));
#endif
currentcond = currentcond->next;
}
else { /* we are true with no more conditions to check */
#ifdef RULEVERBOSE
printf("Eval Cond %d (LAST) returned TRUE.\n", currentcond->cond_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d (LAST) returned TRUE.", currentcond->cond_id));
#endif
break; /* so return so we can find out what to do next */
}
@ -263,7 +264,7 @@ while (currentrule) { /* rules to evaluate */
/* we do these IFs in the fasted/most important order */
if ((true)&&(currentrule->true_fptr)) {
#ifdef RULEVERBOSE
printf("Eval Rule %d is TRUE returning fptr\n", currentrule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE returning fptr", currentrule->rule_id));
#endif
*fptr = currentrule->true_fptr;
*extraargs = currentrule->true_extraargs;
@ -271,7 +272,7 @@ while (currentrule) { /* rules to evaluate */
}
if ((!true)&&(currentrule->false_fptr)) {
#ifdef RULEVERBOSE
printf("Eval Rule %d is FALSE returning fptr\n", currentrule->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is FALSE returning fptr", currentrule->rule_id));
#endif
*fptr = currentrule->false_fptr;
*extraargs = currentrule->false_extraargs;
@ -279,23 +280,21 @@ while (currentrule) { /* rules to evaluate */
}
if (true) {
#ifdef RULEVERBOSE
printf("Eval Rule %d is TRUE jumping to next rule %d\n",
currentrule->rule_id,
((rule_t*)(currentrule->next_true_rule))->rule_id);
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE jumping to next rule %d",
currentrule->rule_id, ((rule_t*)(currentrule->next_true_rule))->rule_id));
#endif
currentrule = (rule_t *) currentrule->next_true_rule;
}
else { /* i.e. not true / lazy eval */
#ifdef RULEVERBOSE
printf("Eval Rule %d is FALSE jumping to next rule %d\n",
currentrule->rule_id,
((rule_t*)(currentrule->next_false_rule))->rule_id);
OPAL_OUTPUT((mca_coll_tuned_stream, "Eval Rule %d is FALSE jumping to next rule %d",
currentrule->rule_id, ((rule_t*)(currentrule->next_false_rule))->rule_id));
#endif
currentrule = (rule_t *) currentrule->next_false_rule;
}
if (!currentrule) {
fprintf(stderr, "eval: disaster, we have gone off into the weeds.. panic!\n");
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Disaster, we have gone off into the weeds.. panic!"));
exit (-10);
}
}

Просмотреть файл

@ -253,14 +253,9 @@ const mca_coll_base_module_1_0_0_t *
mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
struct mca_coll_base_comm_t **data)
{
int use_dynamic = -1;
printf("Tuned query called\n");
if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_tuned_priority_param,
priority)) {
return NULL;
}
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:module_tuned query called"));
*priority = mca_coll_tuned_priority;
/*
* Choose whether to use [intra|inter] decision functions
@ -270,27 +265,20 @@ mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
*
*/
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_use_dynamic_rules_param,
&use_dynamic)) {
printf("No use_dynamic param found!\n");
return NULL;
}
if (OMPI_COMM_IS_INTER(comm)) {
if (use_dynamic) {
printf("using inter_dynamic\n");
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
to_use = &inter_dynamic;
} else {
printf("using inter_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
to_use = &inter_fixed;
}
} else { /* is an intra comm */
if (use_dynamic) {
printf("using intra_dynamic\n");
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
to_use = &intra_dynamic;
} else {
printf("using intra_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
to_use = &intra_fixed;
}
}
@ -307,13 +295,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
int size;
struct mca_coll_base_comm_t *data;
/* fanout parameters */
int tree_fanout_default = 0;
int chain_fanout_default = 0;
int pre_allocate_limit = -1;
int pre_allocate = 1;
printf("Tuned init module called.\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init called."));
/* This routine will become more complex and might have to be */
/* broken into more sections/function calls */
@ -347,15 +332,8 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
*
*/
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_preallocate_memory_comm_size_limit_param,
&pre_allocate_limit)) {
printf("No pre_allocate param found!\n");
return NULL;
}
/* if we within the memory/size limit, allow preallocated data */
if (size<=pre_allocate_limit) {
if (size<=mca_coll_tuned_preallocate_memory_comm_size_limit) {
data = malloc(sizeof(struct mca_coll_base_comm_t) +
(sizeof(ompi_request_t *) * size * 2));
@ -379,23 +357,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
* guess the initial topologies to use rank 0 as root
*/
/* get default fanouts is made available via the MCA */
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
&tree_fanout_default)) {
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
}
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
&chain_fanout_default)) {
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
}
/* general n fan out tree */
data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
data->cached_ntree = ompi_coll_tuned_topo_build_tree (mca_coll_tuned_init_tree_fanout, comm, 0);
data->cached_ntree_root = 0;
data->cached_ntree_fanout = tree_fanout_default;
data->cached_ntree_fanout = mca_coll_tuned_init_tree_fanout;
/* binary tree */
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
@ -413,9 +378,9 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
* will probably change how we cache this later, for now a midsize
* GEF
*/
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
data->cached_chain = ompi_coll_tuned_topo_build_chain (mca_coll_tuned_init_chain_fanout, comm, 0);
data->cached_chain_root = 0;
data->cached_chain_fanout = chain_fanout_default;
data->cached_chain_fanout = mca_coll_tuned_init_chain_fanout;
/* standard pipeline */
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
@ -425,7 +390,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
comm->c_coll_selected_data = data;
printf("Tuned looks like it is in use :)\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
return to_use;
}

Просмотреть файл

@ -277,7 +277,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
for (segindex = 0; segindex < num_segments; segindex++) {
if (segindex < num_segments-1) sendcount = segcount;
else sendcount = count - segindex*segcount;
ret = MCA_PML_CALL( send(sendbuf+segindex*realsegsize, sendcount,
ret = MCA_PML_CALL( send((char*)sendbuf+segindex*realsegsize, sendcount,
datatype, chain->chain_prev,
MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD, comm) );
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@ -288,8 +288,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
/* error handler */
error_hndl:
opal_output( 0, "ERROR_HNDL: node %d file %s line %d error %d\n",
rank, __FILE__, line, ret );
OPAL_OUTPUT (( mca_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret ));
if( inbuf != NULL ) {
if( inbuf[0] != NULL ) free(inbuf[0]);
if( inbuf[1] != NULL ) free(inbuf[1]);

Просмотреть файл

@ -49,7 +49,7 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
int contig;
int dsize;
printf("mca_coll_tuned_reduce_intra_dec_dynamic\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -48,7 +48,7 @@ int mca_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
int contig;
int dsize;
printf("mca_coll_tuned_reduce_intra_dec_fixed\n");
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_reduce_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -23,6 +23,7 @@
#include "mca/coll/coll.h"
#include "mca/coll/base/coll_tags.h"
#include "mca/pml/pml.h"
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
/*
@ -71,14 +72,14 @@ ompi_coll_tuned_topo_build_tree( int fanout,
int i;
ompi_coll_tree_t* tree;
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root));
if (fanout<1) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout);
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout));
return NULL;
}
if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
return NULL;
}
@ -90,8 +91,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
if (!tree) {
printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n");
fflush(stdout);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory"));
return NULL;
}
@ -190,7 +190,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
ompi_coll_tree_t *bmtree;
int i;
printf("Building tuned topo bmtree: rt %d\n", root);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root));
/*
* Get size and rank of the process in this communicator
@ -202,8 +202,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
if (!bmtree) {
printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n");
fflush(stdout);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
return NULL;
}
@ -234,7 +233,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
remote += root;
if( remote >= size ) remote -= size;
if (childs==MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
return NULL;
}
bmtree->tree_next[childs] = remote;
@ -258,7 +257,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
int mark,head,len;
ompi_coll_chain_t *chain;
printf("Building tuned topo chain: fo %d rt %d\n", fanout, root);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root));
/*
* Get size and rank of the process in this communicator
@ -270,7 +269,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
return NULL;
}
if (fanout>MAXTREEFANOUT) {
printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
return NULL;
}
@ -279,7 +278,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
*/
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
if (!chain) {
printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory"));
fflush(stdout);
return NULL;
}
@ -421,24 +420,22 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
{
int i;
printf("%1d tree root %d fanout %d BM %1d nextsize %d prev %d\n", rank,
tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev );
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank,
tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev));
if (tree->tree_nextsize) {
for (i=0;i<tree->tree_nextsize;i++) printf("[%1d] %d ", i, tree->tree_next[i]);
for (i=0;i<tree->tree_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
}
printf("\n");
return (0);
}
int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank)
{
int i;
printf("%1d chain root %d fanout %d nextsize %d prev %d\n", rank,
chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev );
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank,
chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev));
if (chain->chain_nextsize) {
for (i=0;i<chain->chain_nextsize;i++) printf("[%1d] %d ", i, chain->chain_next[i]);
for (i=0;i<chain->chain_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i]));
}
printf("\n");
return (0);
}

Просмотреть файл

@ -55,7 +55,7 @@ ompi_status_public_t statuses[2];
return (MPI_SUCCESS);
error_handler:
fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err);
OPAL_OUTPUT ((mca_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
return (err);
}