Last Cleanup BEFORE adding last two methods and final cross over points.
- new mca param calls - move printfs to OPAL_OUTPUT This commit was SVN r7692.
Этот коммит содержится в:
родитель
b42d4ac780
Коммит
607bdf51b6
@ -37,7 +37,14 @@ extern "C" {
|
||||
*/
|
||||
|
||||
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority_param;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
|
@ -31,36 +31,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* temp debug routines */
|
||||
static int dump_buf_int (char* ptr, int count, char *comment, int rank);
|
||||
|
||||
static int dump_buf_int (char* ptr, int count, char *comment, int rank) {
|
||||
int i=0;
|
||||
int *tptr;
|
||||
int c=0;
|
||||
tptr=(int*)ptr;
|
||||
printf("%1d ", rank);
|
||||
if (comment) printf("%s ", comment);
|
||||
if (count <0) {
|
||||
printf("cnt %d?\n", count);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (count>5) c = 5;
|
||||
else c = count;
|
||||
printf("Cnt %1d ", count);
|
||||
for(i=0;i<c;i++) {
|
||||
printf("%1d [%1d] ", i, *tptr++);
|
||||
}
|
||||
if (c!=count) {
|
||||
tptr=(int*)ptr;
|
||||
printf(" ... %1d [%1d]", count-1, tptr[count-1]);
|
||||
}
|
||||
printf("\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
@ -78,7 +48,7 @@ int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_pairwise rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_pairwise rank %d", rank));
|
||||
|
||||
|
||||
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
|
||||
@ -109,8 +79,7 @@ int mca_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
|
||||
__FILE__,line,err,rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -137,7 +106,7 @@ int mca_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_bruck rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_bruck rank %d", rank));
|
||||
|
||||
|
||||
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
|
||||
@ -268,8 +237,7 @@ int mca_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
|
||||
__FILE__,line,err,rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
if (tmpbuf != NULL) free(tmpbuf);
|
||||
if (packbuf != NULL) free(packbuf);
|
||||
if (weallocated) {
|
||||
@ -296,7 +264,7 @@ int mca_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_two_procs rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_alltoall_intra_two_procs rank %d", rank));
|
||||
|
||||
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
|
||||
@ -327,8 +295,7 @@ int mca_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
|
||||
__FILE__,line,err,rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -350,7 +317,7 @@ int mca_coll_tuned_alltoall_intra_linear(void *sbuf, int scount,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_linear rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_alltoall_intra_linear rank %d", rank));
|
||||
|
||||
|
||||
err = ompi_ddt_get_extent (sdtype, &lb, &sext);
|
||||
@ -360,8 +327,7 @@ int mca_coll_tuned_alltoall_intra_linear(void *sbuf, int scount,
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d, rank %2d\n",
|
||||
__FILE__,line,err,rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_dec_dynamic\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -49,7 +49,7 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_alltoall_intra_dec_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -42,7 +42,7 @@ int mca_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_doublering rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_doublering rank %d", rank));
|
||||
|
||||
left = ((rank-1)%size);
|
||||
right = ((rank+1)%size);
|
||||
@ -87,7 +87,7 @@ int mca_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ int mca_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
printf("mca_coll_tuned_bcast_intra_recursivedoubling rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_recursivedoubling rank %d", rank));
|
||||
|
||||
/* do nearest power of 2 less than size calc */
|
||||
adjsize = 1;
|
||||
@ -162,7 +162,7 @@ int mca_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -175,7 +175,7 @@ int mca_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
printf("mca_coll_tuned_bcast_intra_bruck rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bruck rank %d", rank));
|
||||
|
||||
/* exchange data with rank-2^k and rank+2^k */
|
||||
for (distance = 1; distance < size; distance <<= 1) {
|
||||
@ -190,7 +190,7 @@ int mca_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
fprintf(stderr,"%s:%4d\tError occurred %d\n",__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -202,7 +202,7 @@ int mca_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
|
||||
int err=0;
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
printf("mca_coll_tuned_bcast_intra_two_procs rank %d\n", rank);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_two_procs rank %d", rank));
|
||||
|
||||
if (0==rank) {
|
||||
err = coll_tuned_sendrecv (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER,
|
||||
|
@ -44,7 +44,7 @@ int mca_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_barrier_intra_dec_dynamic\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -44,7 +44,7 @@ int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_barrier_intra_dec_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_barrier_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -49,7 +49,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_chain rank %d root %d\n", rank, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_chain rank %d root %d", rank, root));
|
||||
|
||||
if( size == 1 ) {
|
||||
return MPI_SUCCESS;
|
||||
@ -104,7 +104,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
||||
/* set the buffer pointer */
|
||||
tmpbuf = (char *)buff;
|
||||
|
||||
/* printf("%1d chain root %d num_segments %d\n", rank, root, num_segments); */
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */
|
||||
|
||||
/* root code */
|
||||
if( rank == root ) {
|
||||
@ -173,7 +173,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
||||
chain->chain_next[i],
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (err != MPI_SUCCESS) printf("sendcount %d i %d chain_next %d \n", sendcount, i, chain->chain_next[i]);
|
||||
if (err != MPI_SUCCESS) OPAL_OUTPUT((mca_coll_tuned_stream,"sendcount %d i %d chain_next %d", sendcount, i, chain->chain_next[i]));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
} /* end of for each child */
|
||||
}
|
||||
@ -197,7 +197,7 @@ mca_coll_tuned_bcast_intra_chain ( void *buff, int count,
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
error_hndl:
|
||||
fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -213,7 +213,7 @@ mca_coll_tuned_bcast_intra_pipeline ( void *buffer,
|
||||
{
|
||||
int rank; /* remove when removing print statement */
|
||||
rank = ompi_comm_rank(comm); /* remove when removing print statement */
|
||||
printf("mca_coll_tuned_bcast_intra_pipeline rank %d root %d\n", rank, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_pipeline rank %d root %d", rank, root));
|
||||
|
||||
return mca_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm,
|
||||
segsize, 1 );
|
||||
@ -247,7 +247,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_split_bintree rank %d root %d\n", rank, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_split_bintree rank %d root %d", rank, root));
|
||||
|
||||
if (size == 1) {
|
||||
return MPI_SUCCESS;
|
||||
@ -470,7 +470,7 @@ mca_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
return (MPI_SUCCESS);
|
||||
|
||||
error_hndl:
|
||||
fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -502,7 +502,7 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_bintree rank %d root %d\n", rank, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bintree rank %d root %d", rank, root));
|
||||
|
||||
if (size == 1) {
|
||||
return MPI_SUCCESS;
|
||||
@ -677,7 +677,8 @@ mca_coll_tuned_bcast_intra_bintree ( void* buffer,
|
||||
return (MPI_SUCCESS);
|
||||
|
||||
error_hndl:
|
||||
fprintf(stderr,"[%d]%s:%d: Error %d occurred\n",rank,__FILE__,line,err);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
||||
|
@ -46,7 +46,7 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_dec_dynamic\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -47,7 +47,7 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_bcast_intra_dec_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -36,11 +36,12 @@ const char *mca_coll_tuned_component_version_string =
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_coll_tuned_priority_param = -1;
|
||||
int mca_coll_tuned_preallocate_memory_comm_size_limit_param = -1;
|
||||
int mca_coll_tuned_use_dynamic_rules_param = -1;
|
||||
int mca_coll_tuned_init_tree_fanout_param = -1;
|
||||
int mca_coll_tuned_init_chain_fanout_param = -1;
|
||||
int mca_coll_tuned_stream = -1;
|
||||
int mca_coll_tuned_priority = 30;
|
||||
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
|
||||
int mca_coll_tuned_use_dynamic_rules = 0;
|
||||
int mca_coll_tuned_init_tree_fanout = 4;
|
||||
int mca_coll_tuned_init_chain_fanout = 4;
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
@ -93,43 +94,54 @@ const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = {
|
||||
|
||||
static int tuned_open(void)
|
||||
{
|
||||
printf("Tuned_open called\n");
|
||||
int param;
|
||||
|
||||
/* mca_coll_tuned_component_t *ct = &mca_coll_tuned_component; */
|
||||
|
||||
/* Use a low priority, but allow other components to be lower */
|
||||
|
||||
mca_coll_tuned_priority_param =
|
||||
mca_base_param_register_int("coll", "tuned", "priority", NULL, 30);
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"priority",
|
||||
"Priority of the tuned coll component",
|
||||
false, false, mca_coll_tuned_priority,
|
||||
&mca_coll_tuned_priority);
|
||||
|
||||
/* check the parameter for pre-allocated memory requests etc */
|
||||
mca_coll_tuned_preallocate_memory_comm_size_limit_param =
|
||||
mca_base_param_register_int("coll", "tuned", "pre_allocate_memory", NULL, (32*1024)+1);
|
||||
/* parameter for pre-allocated memory requests etc */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"pre_allocate_memory_comm_size_limit",
|
||||
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
|
||||
false, false, mca_coll_tuned_preallocate_memory_comm_size_limit,
|
||||
&mca_coll_tuned_preallocate_memory_comm_size_limit);
|
||||
|
||||
/* by default DISABLE dynamic rules and force the use of fixed [if] rules */
|
||||
mca_coll_tuned_use_dynamic_rules_param =
|
||||
mca_base_param_register_int("coll", "tuned", "use_dynamic_rules",
|
||||
NULL, 0);
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"use_dynamic_rules",
|
||||
"Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules",
|
||||
false, false, mca_coll_tuned_preallocate_memory_comm_size_limit,
|
||||
&mca_coll_tuned_preallocate_memory_comm_size_limit);
|
||||
|
||||
/* some initial guesses at topology parameters */
|
||||
mca_coll_tuned_init_tree_fanout_param =
|
||||
mca_base_param_register_int("coll", "tuned", "init_tree_fanout",
|
||||
NULL, 4);
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"init_tree_fanout",
|
||||
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
|
||||
false, false, mca_coll_tuned_init_tree_fanout,
|
||||
&mca_coll_tuned_init_tree_fanout);
|
||||
|
||||
mca_coll_tuned_init_chain_fanout_param =
|
||||
mca_base_param_register_int("coll", "tuned", "init_chain_fanout",
|
||||
NULL, 4);
|
||||
|
||||
/* use the newer interface rsn */
|
||||
/* mca_coll_tuned_priority_param = mca_base_param_reg_int(&(ct->super), "priority", "Priority of the tuned coll component", */
|
||||
/* false, false, 30, NULL); */
|
||||
|
||||
/* mca_base_param_reg_int(&(ct->super), "init_tree_fanout", "Fan out used for [balanced] tree topologies in the tuned coll component", */
|
||||
/* false, false, 2, NULL); */
|
||||
|
||||
/* mca_base_param_reg_int(&(ct->super), "init_chain_fanout", */
|
||||
/* "Fan out used for chain [1 fanout followed by pipelines] topology in the tuned coll component", */
|
||||
/* false, false, 2, NULL); */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"init_chain_fanout",
|
||||
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
|
||||
false, false, mca_coll_tuned_init_chain_fanout,
|
||||
&mca_coll_tuned_init_chain_fanout);
|
||||
|
||||
param = mca_base_param_find("coll", NULL, "base_verbose");
|
||||
if (param >= 0) {
|
||||
int verbose;
|
||||
mca_base_param_lookup_int(param, &verbose);
|
||||
if (verbose > 0) {
|
||||
mca_coll_tuned_stream = opal_output_open(NULL);
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:component_open: done!"));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ rule_t* ptr;
|
||||
|
||||
ptr = (rule_t*) calloc (1, sizeof(rule_t));
|
||||
if (!ptr) {
|
||||
fprintf(stderr,"calloc on mk_rule failed!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_rule calloc on mk_rule failed!"));
|
||||
exit (-1);
|
||||
}
|
||||
|
||||
@ -56,17 +56,17 @@ condition_t* ptr;
|
||||
condition_t* last;
|
||||
|
||||
if (!rule) {
|
||||
fprintf(stderr,"rule given in add_condition_to_rule is NULL?!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule rule given is NULL?!\n"));
|
||||
return (-2);
|
||||
}
|
||||
if (param>=PARAMS) {
|
||||
fprintf(stderr,"param given in add_condition_to_rule is %d?!\n", param);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule param given is %d?!\n", param));
|
||||
return (-3);
|
||||
}
|
||||
|
||||
ptr = (condition_t*) calloc (1, sizeof(condition_t));
|
||||
if (!ptr) {
|
||||
fprintf(stderr,"calloc on add_condition_to_rule failed!\n");
|
||||
if (!ptr) {
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:mk_and_add_condition_to_rule calloc failed!\n"));
|
||||
return (-5);
|
||||
}
|
||||
|
||||
@ -99,25 +99,25 @@ int set_rule_links (rule_t * rule, ifp true_fptr, int* true_extraargs,
|
||||
{
|
||||
|
||||
if (!rule) {
|
||||
fprintf(stderr,"rule given in set_rule_links is NULL?!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links rule is NULL?"));
|
||||
return (-2);
|
||||
}
|
||||
|
||||
/* check rule results.. we must have one set for true and one for false */
|
||||
if ((true_fptr)&&(true_rule)) {
|
||||
fprintf(stderr,"BAD. Two links for TRUE on rule %d!\n", rule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for TRUE on rule %d!", rule->rule_id));
|
||||
return (-6);
|
||||
}
|
||||
if ((false_fptr)&&(false_rule)) {
|
||||
fprintf(stderr,"BAD. Two links for FALSE on rule %d!\n", rule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. Two links for FALSE on rule %d!", rule->rule_id));
|
||||
return (-7);
|
||||
}
|
||||
if ((!true_fptr)&&(!true_rule)) {
|
||||
fprintf(stderr,"BAD. NO links for TRUE on rule %d!\n", rule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for TRUE on rule %d!", rule->rule_id));
|
||||
return (-8);
|
||||
}
|
||||
if ((!false_fptr)&&(!false_rule)) {
|
||||
fprintf(stderr,"BAD. NO links for FALSE on rule %d!\n", rule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:set_rule_links BAD. NO links for FALSE on rule %d!", rule->rule_id));
|
||||
return (-9);
|
||||
}
|
||||
|
||||
@ -141,7 +141,7 @@ condition_t* next;
|
||||
int i;
|
||||
|
||||
if (!rule) {
|
||||
fprintf(stderr,"rule given in free_rule is NULL?!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:free_rule rule is NULL?"));
|
||||
return (-2);
|
||||
}
|
||||
|
||||
@ -169,15 +169,15 @@ condition_t* currentcond;
|
||||
int true=1;
|
||||
|
||||
if (!rule) {
|
||||
fprintf(stderr,"rule given in eval_rule is NULL?!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule given is NULL?!"));
|
||||
return (-2);
|
||||
}
|
||||
|
||||
/* first special case is a very fast path... sorta not really grr */
|
||||
if (!rule->nconditions) {
|
||||
#ifdef VERBOSE
|
||||
printf("Rule %d has no conditions so forcing first available\n",
|
||||
rule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Rule %d has no conditions so forcing first available",
|
||||
rule->rule_id));
|
||||
#endif /* VERBOSE */
|
||||
*fptr = rule->true_fptr;
|
||||
return (0);
|
||||
@ -190,7 +190,7 @@ currentrule = rule;
|
||||
|
||||
while (currentrule) { /* rules to evaluate */
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Rule %d ", currentrule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule evaluating rule %d ", currentrule->rule_id));
|
||||
#endif
|
||||
|
||||
/* eval each of the current rules conditions */
|
||||
@ -208,7 +208,7 @@ while (currentrule) { /* rules to evaluate */
|
||||
|
||||
while ((currentcond)&&(true)) { /* while conditions to eval */
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Cond %d ", currentcond->cond_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d ", currentcond->cond_id));
|
||||
#endif
|
||||
switch (currentcond->op) {
|
||||
case LT: if (params->values[currentcond->param] < currentcond->value) {true = 1;}
|
||||
@ -227,9 +227,9 @@ while (currentrule) { /* rules to evaluate */
|
||||
else {true = 0;}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Eval: BAD operator of value %d rule %d cond %d\n",
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule BAD operator of value %d rule %d cond %d",
|
||||
currentcond->op, currentrule->rule_id,
|
||||
currentcond->cond_id);
|
||||
currentcond->cond_id));
|
||||
true = 0;
|
||||
return (-1); /* ?! what else can I do, should have caught before */
|
||||
} /* switch on condition operator */
|
||||
@ -240,19 +240,20 @@ while (currentrule) { /* rules to evaluate */
|
||||
|
||||
if (!true) {
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Cond %d returned FALSE\n", currentcond->cond_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned FALSE", currentcond->cond_id));
|
||||
#endif
|
||||
break; /* if false drop out asap */
|
||||
}
|
||||
if ((true)&&(currentcond->next)) { /* next condition to check */
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Cond %d returned TRUE. Moving to next\n", currentcond->cond_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d returned TRUE. Moving to next",
|
||||
currentcond->cond_id));
|
||||
#endif
|
||||
currentcond = currentcond->next;
|
||||
}
|
||||
else { /* we are true with no more conditions to check */
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Cond %d (LAST) returned TRUE.\n", currentcond->cond_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule eval cond %d (LAST) returned TRUE.", currentcond->cond_id));
|
||||
#endif
|
||||
break; /* so return so we can find out what to do next */
|
||||
}
|
||||
@ -263,7 +264,7 @@ while (currentrule) { /* rules to evaluate */
|
||||
/* we do these IFs in the fasted/most important order */
|
||||
if ((true)&&(currentrule->true_fptr)) {
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Rule %d is TRUE returning fptr\n", currentrule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE returning fptr", currentrule->rule_id));
|
||||
#endif
|
||||
*fptr = currentrule->true_fptr;
|
||||
*extraargs = currentrule->true_extraargs;
|
||||
@ -271,7 +272,7 @@ while (currentrule) { /* rules to evaluate */
|
||||
}
|
||||
if ((!true)&&(currentrule->false_fptr)) {
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Rule %d is FALSE returning fptr\n", currentrule->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is FALSE returning fptr", currentrule->rule_id));
|
||||
#endif
|
||||
*fptr = currentrule->false_fptr;
|
||||
*extraargs = currentrule->false_extraargs;
|
||||
@ -279,23 +280,21 @@ while (currentrule) { /* rules to evaluate */
|
||||
}
|
||||
if (true) {
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Rule %d is TRUE jumping to next rule %d\n",
|
||||
currentrule->rule_id,
|
||||
((rule_t*)(currentrule->next_true_rule))->rule_id);
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule rule %d is TRUE jumping to next rule %d",
|
||||
currentrule->rule_id, ((rule_t*)(currentrule->next_true_rule))->rule_id));
|
||||
#endif
|
||||
currentrule = (rule_t *) currentrule->next_true_rule;
|
||||
}
|
||||
else { /* i.e. not true / lazy eval */
|
||||
#ifdef RULEVERBOSE
|
||||
printf("Eval Rule %d is FALSE jumping to next rule %d\n",
|
||||
currentrule->rule_id,
|
||||
((rule_t*)(currentrule->next_false_rule))->rule_id);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "Eval Rule %d is FALSE jumping to next rule %d",
|
||||
currentrule->rule_id, ((rule_t*)(currentrule->next_false_rule))->rule_id));
|
||||
#endif
|
||||
currentrule = (rule_t *) currentrule->next_false_rule;
|
||||
}
|
||||
|
||||
if (!currentrule) {
|
||||
fprintf(stderr, "eval: disaster, we have gone off into the weeds.. panic!\n");
|
||||
OPAL_OUTPUT((mca_colL_tuned_stream, "coll:tuned:dynamic_rules:eval_rule Disaster, we have gone off into the weeds.. panic!"));
|
||||
exit (-10);
|
||||
}
|
||||
}
|
||||
|
@ -253,14 +253,9 @@ const mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
struct mca_coll_base_comm_t **data)
|
||||
{
|
||||
int use_dynamic = -1;
|
||||
|
||||
printf("Tuned query called\n");
|
||||
if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_tuned_priority_param,
|
||||
priority)) {
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:module_tuned query called"));
|
||||
|
||||
*priority = mca_coll_tuned_priority;
|
||||
|
||||
/*
|
||||
* Choose whether to use [intra|inter] decision functions
|
||||
@ -270,27 +265,20 @@ mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
*
|
||||
*/
|
||||
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_param_lookup_int(mca_coll_tuned_use_dynamic_rules_param,
|
||||
&use_dynamic)) {
|
||||
printf("No use_dynamic param found!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
if (use_dynamic) {
|
||||
printf("using inter_dynamic\n");
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
|
||||
to_use = &inter_dynamic;
|
||||
} else {
|
||||
printf("using inter_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
|
||||
to_use = &inter_fixed;
|
||||
}
|
||||
} else { /* is an intra comm */
|
||||
if (use_dynamic) {
|
||||
printf("using intra_dynamic\n");
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
|
||||
to_use = &intra_dynamic;
|
||||
} else {
|
||||
printf("using intra_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
|
||||
to_use = &intra_fixed;
|
||||
}
|
||||
}
|
||||
@ -307,13 +295,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
int size;
|
||||
struct mca_coll_base_comm_t *data;
|
||||
/* fanout parameters */
|
||||
int tree_fanout_default = 0;
|
||||
int chain_fanout_default = 0;
|
||||
int pre_allocate_limit = -1;
|
||||
int pre_allocate = 1;
|
||||
|
||||
|
||||
printf("Tuned init module called.\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init called."));
|
||||
|
||||
/* This routine will become more complex and might have to be */
|
||||
/* broken into more sections/function calls */
|
||||
@ -347,15 +332,8 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
*
|
||||
*/
|
||||
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_param_lookup_int(mca_coll_tuned_preallocate_memory_comm_size_limit_param,
|
||||
&pre_allocate_limit)) {
|
||||
printf("No pre_allocate param found!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if we within the memory/size limit, allow preallocated data */
|
||||
if (size<=pre_allocate_limit) {
|
||||
if (size<=mca_coll_tuned_preallocate_memory_comm_size_limit) {
|
||||
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||
(sizeof(ompi_request_t *) * size * 2));
|
||||
|
||||
@ -379,23 +357,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
* guess the initial topologies to use rank 0 as root
|
||||
*/
|
||||
|
||||
/* get default fanouts is made available via the MCA */
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_param_lookup_int(mca_coll_tuned_init_tree_fanout_param,
|
||||
&tree_fanout_default)) {
|
||||
printf("warning: no mca_coll_tuned_init_tree_fanout_param found?\n");
|
||||
}
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_param_lookup_int(mca_coll_tuned_init_chain_fanout_param,
|
||||
&chain_fanout_default)) {
|
||||
printf("warning: no mca_coll_tuned_init_chain_fanout_param found?\n");
|
||||
}
|
||||
|
||||
|
||||
/* general n fan out tree */
|
||||
data->cached_ntree = ompi_coll_tuned_topo_build_tree (tree_fanout_default, comm, 0);
|
||||
data->cached_ntree = ompi_coll_tuned_topo_build_tree (mca_coll_tuned_init_tree_fanout, comm, 0);
|
||||
data->cached_ntree_root = 0;
|
||||
data->cached_ntree_fanout = tree_fanout_default;
|
||||
data->cached_ntree_fanout = mca_coll_tuned_init_tree_fanout;
|
||||
|
||||
/* binary tree */
|
||||
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
|
||||
@ -413,9 +378,9 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
* will probably change how we cache this later, for now a midsize
|
||||
* GEF
|
||||
*/
|
||||
data->cached_chain = ompi_coll_tuned_topo_build_chain (chain_fanout_default, comm, 0);
|
||||
data->cached_chain = ompi_coll_tuned_topo_build_chain (mca_coll_tuned_init_chain_fanout, comm, 0);
|
||||
data->cached_chain_root = 0;
|
||||
data->cached_chain_fanout = chain_fanout_default;
|
||||
data->cached_chain_fanout = mca_coll_tuned_init_chain_fanout;
|
||||
|
||||
/* standard pipeline */
|
||||
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
|
||||
@ -425,7 +390,7 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
|
||||
comm->c_coll_selected_data = data;
|
||||
|
||||
printf("Tuned looks like it is in use :)\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
|
||||
return to_use;
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
|
||||
for (segindex = 0; segindex < num_segments; segindex++) {
|
||||
if (segindex < num_segments-1) sendcount = segcount;
|
||||
else sendcount = count - segindex*segcount;
|
||||
ret = MCA_PML_CALL( send(sendbuf+segindex*realsegsize, sendcount,
|
||||
ret = MCA_PML_CALL( send((char*)sendbuf+segindex*realsegsize, sendcount,
|
||||
datatype, chain->chain_prev,
|
||||
MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD, comm) );
|
||||
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
@ -288,8 +288,7 @@ int mca_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
|
||||
|
||||
/* error handler */
|
||||
error_hndl:
|
||||
opal_output( 0, "ERROR_HNDL: node %d file %s line %d error %d\n",
|
||||
rank, __FILE__, line, ret );
|
||||
OPAL_OUTPUT (( mca_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret ));
|
||||
if( inbuf != NULL ) {
|
||||
if( inbuf[0] != NULL ) free(inbuf[0]);
|
||||
if( inbuf[1] != NULL ) free(inbuf[1]);
|
||||
|
@ -49,7 +49,7 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_reduce_intra_dec_dynamic\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -48,7 +48,7 @@ int mca_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
printf("mca_coll_tuned_reduce_intra_dec_fixed\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_reduce_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "mca/coll/coll.h"
|
||||
#include "mca/coll/base/coll_tags.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "coll_tuned.h"
|
||||
#include "coll_tuned_topo.h"
|
||||
|
||||
/*
|
||||
@ -71,14 +72,14 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
||||
int i;
|
||||
ompi_coll_tree_t* tree;
|
||||
|
||||
printf("Building tuned topo tree: fo %d rt %d\n", fanout, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root));
|
||||
|
||||
if (fanout<1) {
|
||||
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d\n", fanout);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout));
|
||||
return NULL;
|
||||
}
|
||||
if (fanout>MAXTREEFANOUT) {
|
||||
printf("ompi_coll_tuned_topo_build_tree: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -90,8 +91,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
|
||||
|
||||
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
||||
if (!tree) {
|
||||
printf("PANIC:ompi_coll_tuned_topo_build_tree:out of memory\n");
|
||||
fflush(stdout);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory"));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -190,7 +190,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
ompi_coll_tree_t *bmtree;
|
||||
int i;
|
||||
|
||||
printf("Building tuned topo bmtree: rt %d\n", root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root));
|
||||
|
||||
/*
|
||||
* Get size and rank of the process in this communicator
|
||||
@ -202,8 +202,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
|
||||
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
|
||||
if (!bmtree) {
|
||||
printf("PANIC:ompi_coll_tuned_topo_build_bmtree:out of memory\n");
|
||||
fflush(stdout);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -234,7 +233,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
remote += root;
|
||||
if( remote >= size ) remote -= size;
|
||||
if (childs==MAXTREEFANOUT) {
|
||||
printf("ompi_coll_tuned_topo_build_bmtree: max fanout incorrect %d needed %d\n", MAXTREEFANOUT, childs);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
|
||||
return NULL;
|
||||
}
|
||||
bmtree->tree_next[childs] = remote;
|
||||
@ -258,7 +257,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
||||
int mark,head,len;
|
||||
ompi_coll_chain_t *chain;
|
||||
|
||||
printf("Building tuned topo chain: fo %d rt %d\n", fanout, root);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root));
|
||||
|
||||
/*
|
||||
* Get size and rank of the process in this communicator
|
||||
@ -270,7 +269,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
||||
return NULL;
|
||||
}
|
||||
if (fanout>MAXTREEFANOUT) {
|
||||
printf("ompi_coll_tuned_topo_build_chain: invalid fanout %d bigger than max %d\n", fanout, MAXTREEFANOUT);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -279,7 +278,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
|
||||
*/
|
||||
chain = (ompi_coll_chain_t*)malloc( sizeof(ompi_coll_chain_t) );
|
||||
if (!chain) {
|
||||
printf("PANIC:ompi_coll_tuned_topo_build_chain:out of memory\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory"));
|
||||
fflush(stdout);
|
||||
return NULL;
|
||||
}
|
||||
@ -421,24 +420,22 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
|
||||
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
|
||||
{
|
||||
int i;
|
||||
printf("%1d tree root %d fanout %d BM %1d nextsize %d prev %d\n", rank,
|
||||
tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev );
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank,
|
||||
tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev));
|
||||
if (tree->tree_nextsize) {
|
||||
for (i=0;i<tree->tree_nextsize;i++) printf("[%1d] %d ", i, tree->tree_next[i]);
|
||||
for (i=0;i<tree->tree_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
|
||||
}
|
||||
printf("\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank)
|
||||
{
|
||||
int i;
|
||||
printf("%1d chain root %d fanout %d nextsize %d prev %d\n", rank,
|
||||
chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev );
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank,
|
||||
chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev));
|
||||
if (chain->chain_nextsize) {
|
||||
for (i=0;i<chain->chain_nextsize;i++) printf("[%1d] %d ", i, chain->chain_next[i]);
|
||||
for (i=0;i<chain->chain_nextsize;i++) OPAL_OUTPUT((mca_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i]));
|
||||
}
|
||||
printf("\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ ompi_status_public_t statuses[2];
|
||||
return (MPI_SUCCESS);
|
||||
|
||||
error_handler:
|
||||
fprintf(stderr,"%s:%d: Error %d occurred\n",__FILE__,line,err);
|
||||
OPAL_OUTPUT ((mca_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user