diff --git a/ompi/mca/coll/tuned/coll_tuned_allreduce.c b/ompi/mca/coll/tuned/coll_tuned_allreduce.c index 09436a5d68..4bc7c45a55 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allreduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_allreduce.c @@ -42,9 +42,9 @@ */ int ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm) { int err; int rank; @@ -97,16 +97,15 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count */ int ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm) { int err; int rank; rank = ompi_comm_rank(comm); - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank)); /* Reduce to 0 and broadcast. */ @@ -144,63 +143,63 @@ int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorith int rc; int max_alg = 2; - ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg; + ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg; -rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "allreduce_algorithm_count", - "Number of allreduce algorithms available", - false, true, max_alg, NULL); + rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, + "allreduce_algorithm_count", + "Number of allreduce algorithms available", + false, true, max_alg, NULL); -mca_param_indices->algorithm_param_index = mca_base_param_reg_int( - &mca_coll_tuned_component.super.collm_version, - "allreduce_algorithm", - "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)", - false, false, 0, NULL); + mca_param_indices->algorithm_param_index = mca_base_param_reg_int( + &mca_coll_tuned_component.super.collm_version, + "allreduce_algorithm", + "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)", + false, false, 0, NULL); -mca_param_indices->segsize_param_index = mca_base_param_reg_int( - &mca_coll_tuned_component.super.collm_version, - "allreduce_algorithm_segmentsize", - "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - false, false, 0, NULL); + mca_param_indices->segsize_param_index = mca_base_param_reg_int( + &mca_coll_tuned_component.super.collm_version, + "allreduce_algorithm_segmentsize", + "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", + false, false, 0, NULL); -mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int( - &mca_coll_tuned_component.super.collm_version, - "allreduce_algorithm_tree_fanout", - "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", - false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int( + &mca_coll_tuned_component.super.collm_version, + "allreduce_algorithm_tree_fanout", + "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", + false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); -mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int( - &mca_coll_tuned_component.super.collm_version, - "allreduce_algorithm_chain_fanout", - "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); + mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int( + &mca_coll_tuned_component.super.collm_version, + "allreduce_algorithm_chain_fanout", + "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d", - comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d", + comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm)); -switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { + switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm); case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", - comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm, - ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); + comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm, + ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); return (MPI_ERR_ARG); } /* switch */ @@ -208,25 +207,23 @@ switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - int algorithm, int faninout, int segsize) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + int algorithm, int faninout, int segsize) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", - algorithm, faninout, segsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", + algorithm, faninout, segsize)); -switch (algorithm) { + switch (algorithm) { case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm); case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", - algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); + algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); return (MPI_ERR_ARG); } /* switch */ } - - diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall.c b/ompi/mca/coll/tuned/coll_tuned_alltoall.c index f35147ee0d..08aa4238ac 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall.c @@ -31,10 +31,10 @@ #include "coll_tuned_util.h" int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { int line = -1, err = 0; int rank, size, step; @@ -54,37 +54,37 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - /* Perform pairwise exchange - starting from 1 so the local copy is last */ - for (step = 1; step < size+1; step++) { + /* Perform pairwise exchange - starting from 1 so the local copy is last */ + for (step = 1; step < size+1; step++) { - /* who do we talk to in this step? */ - sendto = (rank+step)%size; - recvfrom = (rank+size-step)%size; + /* who do we talk to in this step? */ + sendto = (rank+step)%size; + recvfrom = (rank+size-step)%size; - /* where from are we sending and where from are we receiving actual data ? */ - tmpsend = (char*)sbuf+sendto*sext*scount; - tmprecv = (char*)rbuf+recvfrom*rext*rcount; + /* where from are we sending and where from are we receiving actual data ? */ + tmpsend = (char*)sbuf+sendto*sext*scount; + tmprecv = (char*)rbuf+recvfrom*rext*rcount; - /* send and receive */ - err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL, - tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, - comm, MPI_STATUS_IGNORE, rank); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - } + /* send and receive */ + err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL, + tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, + comm, MPI_STATUS_IGNORE, rank); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + } - return MPI_SUCCESS; + return MPI_SUCCESS; err_hndl: OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - return err; + return err; } int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { int i, k, line = -1; int rank, size; @@ -145,107 +145,107 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount, err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) ((size-rank)*scount), tmpbuf, ((char*)sbuf)+rank*scount*sext); if (err<0) { - line = __LINE__; err = -1; goto err_hndl; + line = __LINE__; err = -1; goto err_hndl; } if (rank != 0) { err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) (rank*scount), - tmpbuf+(size-rank)*scount*sext, (char*)sbuf); - if (err<0) { - line = __LINE__; err = -1; goto err_hndl; - } - } - - /* perform communication step */ - for (distance = 1; distance < size; distance<<=1) { - - /* send data to "sendto" */ - sendto = (rank+distance)%size; - recvfrom = (rank-distance+size)%size; - packsize = 0; - k = 0; - - /* create indexed datatype */ - for (i = 1; i < size; i++) { - if ((i&distance) == distance) { - displs[k] = i*scount; blen[k] = scount; - k++; - } - } - /* Set indexes and displacements */ - err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - /* Commit the new datatype */ - err = MPI_Type_commit(&iddt); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - /* have the new distribution ddt, pack and exchange data */ - err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - /* Sendreceive */ - err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto, - MCA_COLL_BASE_TAG_ALLTOALL, - rbuf, packsize, MPI_PACKED, recvfrom, - MCA_COLL_BASE_TAG_ALLTOALL, - comm, MPI_STATUS_IGNORE, rank); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - /* Unpack data from rbuf to tmpbuf */ - position = 0; - err = MPI_Unpack(rbuf, packsize, &position, - tmpbuf, 1, iddt, comm); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - /* free ddt */ - err = MPI_Type_free(&iddt); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - } /* end of for (distance = 1... */ - - /* Step 3 - local rotation - */ - for (i = 0; i < size; i++) { - - err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount, - ((char*)rbuf)+(((rank-i+size)%size)*rcount*rext), - tmpbuf+i*rcount*rext); + tmpbuf+(size-rank)*scount*sext, (char*)sbuf); if (err<0) { line = __LINE__; err = -1; goto err_hndl; } - } + } + + /* perform communication step */ + for (distance = 1; distance < size; distance<<=1) { + + /* send data to "sendto" */ + sendto = (rank+distance)%size; + recvfrom = (rank-distance+size)%size; + packsize = 0; + k = 0; + + /* create indexed datatype */ + for (i = 1; i < size; i++) { + if ((i&distance) == distance) { + displs[k] = i*scount; blen[k] = scount; + k++; + } + } + /* Set indexes and displacements */ + err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + /* Commit the new datatype */ + err = MPI_Type_commit(&iddt); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + + /* have the new distribution ddt, pack and exchange data */ + err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + + /* Sendreceive */ + err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto, + MCA_COLL_BASE_TAG_ALLTOALL, + rbuf, packsize, MPI_PACKED, recvfrom, + MCA_COLL_BASE_TAG_ALLTOALL, + comm, MPI_STATUS_IGNORE, rank); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + + /* Unpack data from rbuf to tmpbuf */ + position = 0; + err = MPI_Unpack(rbuf, packsize, &position, + tmpbuf, 1, iddt, comm); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + + /* free ddt */ + err = MPI_Type_free(&iddt); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + } /* end of for (distance = 1... */ + + /* Step 3 - local rotation - */ + for (i = 0; i < size; i++) { + + err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount, + ((char*)rbuf)+(((rank-i+size)%size)*rcount*rext), + tmpbuf+i*rcount*rext); + if (err<0) { + line = __LINE__; err = -1; goto err_hndl; + } + } if (err<0) { - line = __LINE__; err = -1; goto err_hndl; + line = __LINE__; err = -1; goto err_hndl; } - /* Step 4 - clean up */ - if (tmpbuf != NULL) free(tmpbuf); - if (packbuf != NULL) free(packbuf); - if (weallocated) { - if (displs != NULL) free(displs); - if (blen != NULL) free(blen); - } + /* Step 4 - clean up */ + if (tmpbuf != NULL) free(tmpbuf); + if (packbuf != NULL) free(packbuf); + if (weallocated) { + if (displs != NULL) free(displs); + if (blen != NULL) free(blen); + } - return OMPI_SUCCESS; + return OMPI_SUCCESS; err_hndl: OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - if (tmpbuf != NULL) free(tmpbuf); - if (packbuf != NULL) free(packbuf); - if (weallocated) { - if (displs != NULL) free(displs); - if (blen != NULL) free(blen); - } - return err; + if (tmpbuf != NULL) free(tmpbuf); + if (packbuf != NULL) free(packbuf); + if (weallocated) { + if (displs != NULL) free(displs); + if (blen != NULL) free(blen); + } + return err; } int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { int line = -1, err = 0; int rank; @@ -273,8 +273,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount, /* send and receive */ err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL, - tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, - comm, MPI_STATUS_IGNORE, rank ); + tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, + comm, MPI_STATUS_IGNORE, rank ); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } /* ddt sendrecv your own data */ @@ -287,7 +287,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount, err_hndl: OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - return err; + return err; } @@ -311,10 +311,10 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { int i; int rank; @@ -443,51 +443,51 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = max_alg; -rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "alltoall_algorithm_count", - "Number of alltoall algorithms available", - false, true, max_alg, NULL); + rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, + "alltoall_algorithm_count", + "Number of alltoall algorithms available", + false, true, max_alg, NULL); -mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "alltoall_algorithm", - "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.", - false, false, 0, NULL); + mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "alltoall_algorithm", + "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.", + false, false, 0, NULL); -mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "alltoall_algorithm_segmentsize", - "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - false, false, 0, NULL); + mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "alltoall_algorithm_segmentsize", + "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", + false, false, 0, NULL); -mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "alltoall_algorithm_tree_fanout", - "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", - false, false, - ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "alltoall_algorithm_tree_fanout", + "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", + false, false, + ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); -mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "alltoall_algorithm_chain_fanout", - "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); + mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "alltoall_algorithm_chain_fanout", + "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d", - comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm)); + comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm)); -switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { + switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); @@ -495,7 +495,7 @@ switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", - comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); + comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); return (MPI_ERR_ARG); } /* switch */ @@ -503,16 +503,16 @@ switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - int algorithm, int faninout, int segsize) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + int algorithm, int faninout, int segsize) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d", - algorithm, faninout, segsize)); + algorithm, faninout, segsize)); -switch (algorithm) { + switch (algorithm) { case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); @@ -520,7 +520,7 @@ switch (algorithm) { case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", - algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); + algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); return (MPI_ERR_ARG); } /* switch */ diff --git a/ompi/mca/coll/tuned/coll_tuned_barrier.c b/ompi/mca/coll/tuned/coll_tuned_barrier.c index 2fddc46223..e3633621c1 100644 --- a/ompi/mca/coll/tuned/coll_tuned_barrier.c +++ b/ompi/mca/coll/tuned/coll_tuned_barrier.c @@ -65,7 +65,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm) if (rank > 0) { /* receive message from the left */ err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } } @@ -77,14 +77,14 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm) /* root needs to receive from the last node */ if (rank == 0) { err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } } /* Allow nodes to exit */ if (rank > 0) { /* post Receive from left */ err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } } @@ -96,15 +96,15 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm) /* rank 0 post receive from the last node */ if (rank == 0) { err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } } return MPI_SUCCESS; err_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - return err; + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + return err; } @@ -131,13 +131,13 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * if (rank >= adjsize) { /* send message to lower ranked node */ err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank-adjsize, - MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm)); + MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} /* post receive from lower ranked node */ err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank-adjsize, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} @@ -145,7 +145,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * /* receive message from high level rank */ err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank+adjsize, - MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); + MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } @@ -160,8 +160,8 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * if (remote >= adjsize) continue; err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER, - NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE, rank); + NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER, + comm, MPI_STATUS_IGNORE, rank); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } @@ -173,7 +173,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * /* send enter message to higher ranked node */ err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank+adjsize, - MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_SYNCHRONOUS, comm)); + MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_SYNCHRONOUS, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } @@ -181,9 +181,9 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * return MPI_SUCCESS; - err_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - return err; + err_hndl: + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + return err; } @@ -206,16 +206,16 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm) from = (rank + size - distance)%size; to = (rank + distance)%size; err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, to, MCA_COLL_BASE_TAG_BARRIER, - NULL, 0, MPI_BYTE, from, MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE, rank); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} + NULL, 0, MPI_BYTE, from, MCA_COLL_BASE_TAG_BARRIER, + comm, MPI_STATUS_IGNORE, rank); + if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } return MPI_SUCCESS; - err_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); - return err; + err_hndl: + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + return err; } @@ -233,13 +233,13 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm) if (0==rank) { err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER, - NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE, rank); + NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER, + comm, MPI_STATUS_IGNORE, rank); } else { err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, - NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE, rank); + NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, + comm, MPI_STATUS_IGNORE, rank); } return (err); @@ -334,39 +334,39 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_ int rc; int max_alg = 5; - ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg; + ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg; -rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "barrier_algorithm_count", - "Number of barrier algorithms available", - false, true, max_alg, NULL); + rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, + "barrier_algorithm_count", + "Number of barrier algorithms available", + false, true, max_alg, NULL); -mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "barrier_algorithm", - "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only", - false, false, 0, NULL); + mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "barrier_algorithm", + "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only", + false, false, 0, NULL); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d", - comm->c_coll_selected_data->user_forced[BARRIER].algorithm)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d", + comm->c_coll_selected_data->user_forced[BARRIER].algorithm)); -switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { + switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm); case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm); case (2): return ompi_coll_tuned_barrier_intra_doublering (comm); case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm); case (4): return ompi_coll_tuned_barrier_intra_bruck (comm); case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm); -/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ + /* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", - comm->c_coll_selected_data->user_forced[BARRIER].algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); + comm->c_coll_selected_data->user_forced[BARRIER].algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); return (MPI_ERR_ARG); } /* switch */ @@ -375,19 +375,19 @@ switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int algorithm, int faninout, int segsize) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout)); -switch (algorithm) { + switch (algorithm) { case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm); case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm); case (2): return ompi_coll_tuned_barrier_intra_doublering (comm); case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm); case (4): return ompi_coll_tuned_barrier_intra_bruck (comm); case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm); -/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ + /* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", - algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); + algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); return (MPI_ERR_ARG); } /* switch */ diff --git a/ompi/mca/coll/tuned/coll_tuned_bcast.c b/ompi/mca/coll/tuned/coll_tuned_bcast.c index 96aede37a5..806ca5ad3e 100644 --- a/ompi/mca/coll/tuned/coll_tuned_bcast.c +++ b/ompi/mca/coll/tuned/coll_tuned_bcast.c @@ -31,10 +31,10 @@ int ompi_coll_tuned_bcast_intra_chain ( void *buff, int count, - struct ompi_datatype_t *datatype, - int root, - struct ompi_communicator_t *comm, - uint32_t segsize, int32_t chains ) + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + uint32_t segsize, int32_t chains ) { int err = 0, line, rank, size, segindex, i; int segcount; /* Number of elements sent with each segment */ @@ -111,7 +111,7 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count, /* set the buffer pointer */ tmpbuf = (char *)buff; -/* OPAL_OUTPUT((ompi_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */ + /* OPAL_OUTPUT((ompi_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */ /* root code */ if( rank == root ) { @@ -141,8 +141,8 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count, */ new_sendcount = sendcount = segcount; err = MCA_PML_CALL(irecv( tmpbuf, sendcount, datatype, - chain->chain_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &base_req)); + chain->chain_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &base_req)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } for( segindex = 1; segindex < num_segments; segindex++ ) { @@ -212,29 +212,29 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count, int ompi_coll_tuned_bcast_intra_pipeline ( void *buffer, - int count, - struct ompi_datatype_t *datatype, - int root, - struct ompi_communicator_t *comm, - uint32_t segsize ) + int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + uint32_t segsize ) { int rank; /* remove when removing print statement */ rank = ompi_comm_rank(comm); /* remove when removing print statement */ OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_pipeline rank %d root %d ss %5d", rank, root, segsize)); return ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm, - segsize, 1 ); + segsize, 1 ); } int ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, - int count, - struct ompi_datatype_t* datatype, - int root, - struct ompi_communicator_t* comm, - uint32_t segsize ) + int count, + struct ompi_datatype_t* datatype, + int root, + struct ompi_communicator_t* comm, + uint32_t segsize ) { int err=0, line; int rank, size; @@ -307,7 +307,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, (segsize > counts[1] * type_size) ) { /* call linear version here ! */ return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, - root, comm, segsize, 1 )); + root, comm, segsize, 1 )); } err = ompi_ddt_get_extent (datatype, &lb, &type_extent); @@ -349,7 +349,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - /* update tmp buffer */ + /* update tmp buffer */ tmpbuf[i] += realsegsize[i]; } } @@ -448,10 +448,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, if ( (size%2) != 0 && rank != root) { err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE, rank); + pair, MCA_COLL_BASE_TAG_BCAST, + tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, + pair, MCA_COLL_BASE_TAG_BCAST, + comm, MPI_STATUS_IGNORE, rank); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } else if ( (size%2) == 0 ) { /* root sends right buffer to the last node */ @@ -472,17 +472,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, /* everyone else exchanges buffers */ else { err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE, rank); + pair, MCA_COLL_BASE_TAG_BCAST, + tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, + pair, MCA_COLL_BASE_TAG_BCAST, + comm, MPI_STATUS_IGNORE, rank); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } } return (MPI_SUCCESS); error_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); return (err); } @@ -491,11 +491,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, int ompi_coll_tuned_bcast_intra_bintree ( void* buffer, - int count, - struct ompi_datatype_t* datatype, - int root, - struct ompi_communicator_t* comm, - uint32_t segsize ) + int count, + struct ompi_datatype_t* datatype, + int root, + struct ompi_communicator_t* comm, + uint32_t segsize ) { int err=0, line, i; int rank, size; @@ -588,8 +588,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer, for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ /* send data */ MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, - tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); + tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } @@ -639,8 +639,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer, for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ /* send data */ MCA_PML_CALL(isend(tmpbuf, segcount, datatype, - tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); + tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } @@ -661,8 +661,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer, for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, - tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); + tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } @@ -692,7 +692,7 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer, return (MPI_SUCCESS); error_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); return (err); } @@ -720,8 +720,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer, */ int ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm) { int i; int size; @@ -735,7 +735,6 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count, OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root)); - /* Non-root receive the data. */ if (rank != root) { @@ -800,67 +799,67 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc int rc; int max_alg = 5; - ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg; + ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg; -rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "bcast_algorithm_count", - "Number of bcast algorithms available", - false, true, max_alg, NULL); + rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, + "bcast_algorithm_count", + "Number of bcast algorithms available", + false, true, max_alg, NULL); -mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "bcast_algorithm", - "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree.", - false, false, 0, NULL); + mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "bcast_algorithm", + "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree.", + false, false, 0, NULL); -mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "bcast_algorithm_segmentsize", - "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - false, false, 0, NULL); + mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "bcast_algorithm_segmentsize", + "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", + false, false, 0, NULL); -mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "bcast_algorithm_tree_fanout", - "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", - false, false, - ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "bcast_algorithm_tree_fanout", + "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", + false, false, + ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); -mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "bcast_algorithm_chain_fanout", - "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); + mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "bcast_algorithm_chain_fanout", + "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count, - struct ompi_datatype_t *dtype, - int root, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + int root, + struct ompi_communicator_t *comm) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", - comm->c_coll_selected_data->user_forced[BCAST].algorithm)); + comm->c_coll_selected_data->user_forced[BCAST].algorithm)); -switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { + switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm); case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm); case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, - comm->c_coll_selected_data->user_forced[BCAST].segsize, - comm->c_coll_selected_data->user_forced[BCAST].chain_fanout ); + comm->c_coll_selected_data->user_forced[BCAST].segsize, + comm->c_coll_selected_data->user_forced[BCAST].chain_fanout ); case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, - comm->c_coll_selected_data->user_forced[BCAST].segsize); + comm->c_coll_selected_data->user_forced[BCAST].segsize); case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, - comm->c_coll_selected_data->user_forced[BCAST].segsize); + comm->c_coll_selected_data->user_forced[BCAST].segsize); case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, comm->c_coll_selected_data->user_forced[BCAST].segsize); -/* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, - * ompi_coll_tuned_bcast_forced_segsize); */ + /* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, + * ompi_coll_tuned_bcast_forced_segsize); */ default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", - comm->c_coll_selected_data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); + comm->c_coll_selected_data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); return (MPI_ERR_ARG); } /* switch */ @@ -868,27 +867,27 @@ switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count, - struct ompi_datatype_t *dtype, - int root, - struct ompi_communicator_t *comm, - int algorithm, int faninout, int segsize) + struct ompi_datatype_t *dtype, + int root, + struct ompi_communicator_t *comm, + int algorithm, int faninout, int segsize) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d", - algorithm, faninout, segsize)); + algorithm, faninout, segsize)); -switch (algorithm) { + switch (algorithm) { case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm); case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm); case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout ); case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, segsize); case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, segsize); case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, segsize); -/* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, - * segsize); */ + /* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, + * segsize); */ default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", - algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); + algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); return (MPI_ERR_ARG); } /* switch */ diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index f31153ddc5..04746b53cd 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -72,48 +72,48 @@ mca_coll_tuned_component_t mca_coll_tuned_component = { { - /* First, the mca_component_t struct containing meta information - about the component itself */ + /* First, the mca_component_t struct containing meta information + about the component itself */ - { - /* Indicate that we are a coll v1.0.0 component (which also implies a - specific MCA version) */ + { + /* Indicate that we are a coll v1.0.0 component (which also implies a + specific MCA version) */ - MCA_COLL_BASE_VERSION_1_0_0, + MCA_COLL_BASE_VERSION_1_0_0, - /* Component name and version */ + /* Component name and version */ - "tuned", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, + "tuned", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, - /* Component open and close functions */ + /* Component open and close functions */ - tuned_open, - tuned_close - }, + tuned_open, + tuned_close + }, - /* Next the MCA v1.0.0 component meta data */ + /* Next the MCA v1.0.0 component meta data */ - { - /* Whether the component is checkpointable or not */ + { + /* Whether the component is checkpointable or not */ - true - }, + true + }, - /* Initialization / querying functions */ + /* Initialization / querying functions */ - ompi_coll_tuned_init_query, - ompi_coll_tuned_comm_query, - NULL - }, + ompi_coll_tuned_init_query, + ompi_coll_tuned_comm_query, + NULL + }, /* priority of the module */ 0, - /* Tuned component specific information */ - /* Note some of this WAS in the module */ + /* Tuned component specific information */ + /* Note some of this WAS in the module */ NULL /* ompi_coll_alg_rule_t ptr */ }; @@ -122,7 +122,7 @@ static int tuned_open(void) { int param; -/* ompi_coll_tuned_component_t *ct = &ompi_coll_tuned_component; */ + /* ompi_coll_tuned_component_t *ct = &ompi_coll_tuned_component; */ /* Use a low priority, but allow other components to be lower */ @@ -149,13 +149,13 @@ static int tuned_open(void) /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */ if (ompi_coll_tuned_use_dynamic_rules) { -/* char *default_name; */ -/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */ + /* char *default_name; */ + /* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */ mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version, - "dynamic_rules_filename", - "Filename of configuration file that contains the dynamic (@runtime) decision function rules", - false, false, ompi_coll_tuned_dynamic_rules_filename, - &ompi_coll_tuned_dynamic_rules_filename); + "dynamic_rules_filename", + "Filename of configuration file that contains the dynamic (@runtime) decision function rules", + false, false, ompi_coll_tuned_dynamic_rules_filename, + &ompi_coll_tuned_dynamic_rules_filename); } /* some initial guesses at topology parameters */ @@ -176,7 +176,7 @@ static int tuned_open(void) int verbose; mca_base_param_lookup_int(param, &verbose); if (verbose > 0) { - ompi_coll_tuned_stream = opal_output_open(NULL); + ompi_coll_tuned_stream = opal_output_open(NULL); } } @@ -190,7 +190,7 @@ static int tuned_open(void) if (ompi_coll_tuned_use_dynamic_rules) { ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]); ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]); -/* ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */ + /* ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */ ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]); ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]); ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]); diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c index 3a528b95bb..260f4cdd7a 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c @@ -54,9 +54,9 @@ */ int ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm) { OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_dynamic")); @@ -64,7 +64,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, /* check to see if we have some filebased rules */ if (comm->c_coll_selected_data->com_rules[ALLREDUCE]) { - /* we do, so calc the message size or what ever we need and use this for the evaluation */ + /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize; size_t dsize; @@ -72,20 +72,18 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, dsize *= count; alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLREDUCE], - dsize, &faninout, &segsize); + dsize, &faninout, &segsize); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, comm, - alg, faninout, segsize); + alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm); } - else { - return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); - } + return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); } /* @@ -97,10 +95,10 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, */ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic")); @@ -108,7 +106,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, /* check to see if we have some filebased rules */ if (comm->c_coll_selected_data->com_rules[ALLTOALL]) { - /* we do, so calc the message size or what ever we need and use this for the evaluation */ + /* we do, so calc the message size or what ever we need and use this for the evaluation */ int comsize; int alg, faninout, segsize; size_t dsize; @@ -118,11 +116,11 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, dsize *= comsize * scount; alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLTOALL], - dsize, &faninout, &segsize); + dsize, &faninout, &segsize); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, - alg, faninout, segsize); + alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ @@ -130,9 +128,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, if (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - else { - return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); - } + return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } /* @@ -150,25 +146,22 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm) /* check to see if we have some filebased rules */ if (comm->c_coll_selected_data->com_rules[BARRIER]) { - /* we do, so calc the message size or what ever we need and use this for the evaluation */ + /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize; alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BARRIER], - 0, &faninout, &segsize); + 0, &faninout, &segsize); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_barrier_intra_do_this (comm, - alg, faninout, segsize); + alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { - return ompi_coll_tuned_barrier_intra_do_forced (comm); + return ompi_coll_tuned_barrier_intra_do_forced (comm); } - else { - return ompi_coll_tuned_barrier_intra_dec_fixed (comm); - } - + return ompi_coll_tuned_barrier_intra_dec_fixed (comm); } /* @@ -179,8 +172,8 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm) * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) */ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm) { OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic")); @@ -188,7 +181,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, /* check to see if we have some filebased rules */ if (comm->c_coll_selected_data->com_rules[BCAST]) { - /* we do, so calc the message size or what ever we need and use this for the evaluation */ + /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize; size_t dsize; @@ -196,7 +189,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, dsize *= count; alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BCAST], - dsize, &faninout, &segsize); + dsize, &faninout, &segsize); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, comm, @@ -206,12 +199,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, if (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { - return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm); + return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm); } - else { - return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm); - } - + return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm); } /* @@ -223,9 +213,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, * */ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, - int count, struct ompi_datatype_t* datatype, - struct ompi_op_t* op, int root, - struct ompi_communicator_t* comm) + int count, struct ompi_datatype_t* datatype, + struct ompi_op_t* op, int root, + struct ompi_communicator_t* comm) { OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic")); @@ -233,7 +223,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, /* check to see if we have some filebased rules */ if (comm->c_coll_selected_data->com_rules[REDUCE]) { - /* we do, so calc the message size or what ever we need and use this for the evaluation */ + /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize; size_t dsize; @@ -241,20 +231,17 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, dsize *= count; alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[REDUCE], - dsize, &faninout, &segsize); + dsize, &faninout, &segsize); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm, - alg, faninout, segsize); + alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) { - return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm); + return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm); } - else { - return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm); - } - + return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm); } diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 8604f29a1a..4a500f2b2a 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -37,21 +37,13 @@ */ int ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm) { -/* int size; */ -/* int contig; */ -/* int dsize; */ - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed")); -/* size = ompi_comm_size(comm); */ - return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm)); - - } /* @@ -63,16 +55,13 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, */ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm) { - int comsize; - int rank; - int err; - unsigned long dsize; - unsigned long total_dsize; + int comsize, rank, err; + size_t dsize, total_dsize; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_fixed")); @@ -87,21 +76,19 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, /* else we need data size for decision function */ err = ompi_ddt_get_size (sdtype, &dsize); if (err != MPI_SUCCESS) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); return (err); } - total_dsize = dsize * scount * (unsigned long)comsize; /* needed for decision */ + total_dsize = dsize * scount * comsize; /* needed for decision */ if (comsize >= 12 && total_dsize <= 768) { return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - else if (total_dsize <= 131072) { + if (total_dsize <= 131072) { return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - else { - return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); - } + return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } @@ -122,11 +109,10 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm) if (2==comsize) return ompi_coll_tuned_barrier_intra_two_procs(comm); - else -/* return ompi_coll_tuned_barrier_intra_doublering(comm); */ + /* return ompi_coll_tuned_barrier_intra_doublering(comm); */ return ompi_coll_tuned_barrier_intra_recursivedoubling(comm); -/* return ompi_coll_tuned_barrier_intra_bruck(comm); */ -/* return ompi_coll_tuned_barrier_intra_linear(comm); */ + /* return ompi_coll_tuned_barrier_intra_bruck(comm); */ + /* return ompi_coll_tuned_barrier_intra_linear(comm); */ } @@ -139,16 +125,12 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm) * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) */ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm) { - int comsize; - int rank; - int err; - unsigned long msgsize; - unsigned long dsize; + int comsize, rank, err; int segsize = 0; - + size_t msgsize, dsize; OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_dec_fixed")); @@ -158,7 +140,7 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, /* else we need data size for decision function */ err = ompi_ddt_get_size (datatype, &dsize); if (err != MPI_SUCCESS) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); return (err); } @@ -166,34 +148,29 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, /* this is based on gige measurements */ - if ((comsize < 4)) { - segsize = 0; + if (comsize < 4) { return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm); } - else if (comsize == 4) { - if (msgsize < 524288) segsize = 0; - else msgsize = 16384; - return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); + if (comsize == 4) { + if (msgsize < 524288) segsize = 0; + else segsize = 16384; + return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); } - else if (comsize > 4 && comsize <= 8 && msgsize < 4096) { - segsize = 0; - return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm); + if (comsize <= 8 && msgsize < 4096) { + return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm); } - else if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) { - segsize = 16384; - return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); + if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) { + segsize = 16384; + return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); } - else if (comsize > 4 && msgsize >= 524288) { - segsize = 16384; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, segsize); + if (msgsize >= 524288) { + segsize = 16384; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, segsize); } - else { - segsize = 0; - /* once tested can swap this back in */ -/* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */ - return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); - } - + segsize = 0; + /* once tested can swap this back in */ + /* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */ + return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); } /* @@ -205,19 +182,12 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, * */ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, - int count, struct ompi_datatype_t* datatype, - struct ompi_op_t* op, int root, - struct ompi_communicator_t* comm) + int count, struct ompi_datatype_t* datatype, + struct ompi_op_t* op, int root, + struct ompi_communicator_t* comm) { - int comsize; - int rank; - int err; -/* int contig; */ - unsigned long msgsize; - unsigned long dsize; - int segsize = 0; -/* int fanout = 0; */ - + int comsize, rank, err, segsize = 0, fanout = 0; + size_t msgsize, dsize; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed")); @@ -227,39 +197,33 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, /* need data size for decision function */ err = ompi_ddt_get_size (datatype, &dsize); if (err != MPI_SUCCESS) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); return (err); } - msgsize = dsize * (unsigned long)count; /* needed for decision */ + msgsize = dsize * count; /* needed for decision */ - return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); -#ifdef coconuts - /* for small messages use linear algorithm */ - if (msgsize <= 4096) { + /* for small messages use linear algorithm */ + if (msgsize <= 4096) { segsize = 0; - fanout = size-1; -/* when linear implemented or taken from basic put here, right now using chain as a linear system */ -/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */ + fanout = comsize - 1; + /* when linear implemented or taken from basic put here, right now using chain as a linear system */ + /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */ return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); -/* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */ - } else if (msgsize <= 65536 ) { - segsize = 32768; - fanout = 8; + /* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */ + } + if (msgsize < 524288) { + if (msgsize <= 65536 ) { + segsize = 32768; + fanout = 8; + } else { + segsize = 1024; + fanout = comsize/2; + } + /* later swap this for a binary tree */ + /* fanout = 2; */ return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); - } else if (msgsize < 524288) { - segsize = 1024; - fanout = size/2; -/* later swap this for a binary tree */ -/* fanout = 2; */ - return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); - } else -#endif - { - segsize = 1024; - return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, segsize); - } - + } + segsize = 1024; + return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, segsize); } - - diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c index 10c7a7a63a..689eb3a0fd 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c @@ -60,235 +60,235 @@ static int fileline=0; /* used for verbose error messages */ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives) { - FILE *fptr = (FILE*) NULL; - int X; - int CI; - int NCS; - int CS; - int NMS; - int MS, ALG, FANINOUT, SS; - int x, ncs, nms; + FILE *fptr = (FILE*) NULL; + int X; + int CI; + int NCS; + int CS; + int NMS; + int MS, ALG, FANINOUT, SS; + int x, ncs, nms; - ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */ + ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */ - /* individual pointers to sections of rules */ - ompi_coll_alg_rule_t *alg_p = (ompi_coll_alg_rule_t*) NULL; - ompi_coll_com_rule_t *com_p = (ompi_coll_com_rule_t*) NULL; - ompi_coll_msg_rule_t *msg_p = (ompi_coll_msg_rule_t*) NULL; + /* individual pointers to sections of rules */ + ompi_coll_alg_rule_t *alg_p = (ompi_coll_alg_rule_t*) NULL; + ompi_coll_com_rule_t *com_p = (ompi_coll_com_rule_t*) NULL; + ompi_coll_msg_rule_t *msg_p = (ompi_coll_msg_rule_t*) NULL; - /* stats info */ - int total_alg_count = 0; - int total_com_count = 0; - int total_msg_count = 0; + /* stats info */ + int total_alg_count = 0; + int total_com_count = 0; + int total_msg_count = 0; - if (!fname) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n")); - return (-1); - } + if (!fname) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n")); + return (-1); + } - if (!rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n")); - return (-2); - } + if (!rules) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n")); + return (-2); + } - if (n_collectives<1) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives)); - return (-3); - } + if (n_collectives<1) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives)); + return (-3); + } - fptr = fopen (fname, "r"); - if (!fptr) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot read rules file [%s]\n", fname)); - goto on_file_error; - } + fptr = fopen (fname, "r"); + if (!fptr) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot read rules file [%s]\n", fname)); + goto on_file_error; + } - /* make space and init the algorithm rules for each of the n_collectives MPI collectives */ - alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives); + /* make space and init the algorithm rules for each of the n_collectives MPI collectives */ + alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives); - X = getnext(fptr); - if (X<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); - goto on_file_error; - } - if (X>n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline)); - goto on_file_error; - } + X = getnext(fptr); + if (X<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); + goto on_file_error; + } + if (X>n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline)); + goto on_file_error; + } - for (x=0;x=n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline)); - goto on_file_error; - } + CI = getnext (fptr); + if (CI<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline)); + goto on_file_error; + } + if (CI>=n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline)); + goto on_file_error; + } - if (alg_rules[CI].alg_rule_id != CI) { - OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI)); - ompi_coll_tuned_free_all_rules (*rules, n_collectives); - return (-4); - } + if (alg_rules[CI].alg_rule_id != CI) { + OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI)); + ompi_coll_tuned_free_all_rules (*rules, n_collectives); + return (-4); + } - alg_p = &alg_rules[CI]; + alg_p = &alg_rules[CI]; - alg_p->alg_rule_id = CI; - alg_p->n_com_sizes = 0; - alg_p->com_rules = (ompi_coll_com_rule_t *) NULL; + alg_p->alg_rule_id = CI; + alg_p->n_com_sizes = 0; + alg_p->com_rules = (ompi_coll_com_rule_t *) NULL; - NCS = getnext (fptr); - if (NCS<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline)); - goto on_file_error; - } + NCS = getnext (fptr); + if (NCS<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline)); + goto on_file_error; + } - alg_p->n_com_sizes = NCS; - alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI); + alg_p->n_com_sizes = NCS; + alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI); - for (ncs=0;ncscom_rules[ncs]); + com_p = &(alg_p->com_rules[ncs]); - CS = getnext (fptr); - if (CS<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline)); - goto on_file_error; - } - - com_p->mpi_comsize = CS; - - NMS = getnext (fptr); - if (NMS<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline)); - goto on_file_error; - } - - com_p->n_msg_sizes = NMS; - com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS); - - msg_p = com_p->msg_rules; - - for (nms=0;nmsmsg_rules[nms]); - - MS = getnext (fptr); - if (MS<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + CS = getnext (fptr); + if (CS<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline)); goto on_file_error; } - msg_p->msg_size = MS; - ALG = getnext (fptr); - if (ALG<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + com_p->mpi_comsize = CS; + + NMS = getnext (fptr); + if (NMS<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline)); goto on_file_error; } - msg_p->result_alg = ALG; - FANINOUT = getnext (fptr); - if (FANINOUT<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); - goto on_file_error; - } - msg_p->result_topo_faninout = FANINOUT; + com_p->n_msg_sizes = NMS; + com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS); - SS = getnext (fptr); - if (SS<0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); - goto on_file_error; - } - msg_p->result_segsize = SS; + msg_p = com_p->msg_rules; - if (!nms && MS) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline)); - goto on_file_error; - } + for (nms=0;nmsmsg_rules[nms]); - } /* msg size */ + MS = getnext (fptr); + if (MS<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->msg_size = MS; - total_com_count++; + ALG = getnext (fptr); + if (ALG<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->result_alg = ALG; - } /* comm size */ + FANINOUT = getnext (fptr); + if (FANINOUT<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->result_topo_faninout = FANINOUT; - total_alg_count++; + SS = getnext (fptr); + if (SS<0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->result_segsize = SS; - } /* per collective */ + if (!nms && MS) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline)); + goto on_file_error; + } + + total_msg_count++; + + } /* msg size */ + + total_com_count++; + + } /* comm size */ + + total_alg_count++; + + } /* per collective */ - fclose (fptr); + fclose (fptr); - OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count)); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count)); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count)); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline)); - /* return the rules to the caller */ - *rules = alg_rules; + /* return the rules to the caller */ + *rules = alg_rules; - return (total_alg_count); + return (total_alg_count); -on_file_error: + on_file_error: - /* here we close out the file and delete any memory allocated nicely */ - /* we return back a verbose message and a count of -1 algorithms read */ - /* draconian but its better than having a bad collective decision table */ + /* here we close out the file and delete any memory allocated nicely */ + /* we return back a verbose message and a count of -1 algorithms read */ + /* draconian but its better than having a bad collective decision table */ - OPAL_OUTPUT((ompi_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline)); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Fix errors as listed above and try again.\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Fix errors as listed above and try again.\n")); - /* deallocate memory if allocated */ - if (alg_rules) ompi_coll_tuned_free_all_rules (alg_rules, n_collectives); + /* deallocate memory if allocated */ + if (alg_rules) ompi_coll_tuned_free_all_rules (alg_rules, n_collectives); - /* close file */ - if (fptr) fclose (fptr); + /* close file */ + if (fptr) fclose (fptr); - *rules = (ompi_coll_alg_rule_t*) NULL; - return (-1); + *rules = (ompi_coll_alg_rule_t*) NULL; + return (-1); } static int getnext (FILE *fptr) { - int val; - int rc; - char trash; + int val; + int rc; + char trash; - do { - rc = fscanf(fptr, "%d", &val); - if (rc==EOF) return (MYEOF); - if (1==rc) return (val); - else { - rc = fread(&trash, 1, 1, fptr); - if ('\n'==trash) fileline++; - if ('#'==trash) skiptonewline (fptr); - } - } while (1); + do { + rc = fscanf(fptr, "%d", &val); + if (rc==EOF) return (MYEOF); + if (1==rc) return (val); + else { + rc = fread(&trash, 1, 1, fptr); + if ('\n'==trash) fileline++; + if ('#'==trash) skiptonewline (fptr); + } + } while (1); -return rc; + return rc; } static void skiptonewline (FILE *fptr) { - char val; - int rc; + char val; + int rc; - do { - rc = fread(&val, 1, 1, fptr); - if (0==rc) return; - if ((1==rc)&&('\n'==val)) { - fileline++; - return; - } - } while (1); + do { + rc = fread(&val, 1, 1, fptr); + if (0==rc) return; + if ((1==rc)&&('\n'==val)) { + fileline++; + return; + } + } while (1); } diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c index c56bed0717..f8947f431c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c @@ -41,36 +41,36 @@ ompi_coll_alg_rule_t* ompi_coll_tuned_mk_alg_rules (int n_alg) { - int i; - ompi_coll_alg_rule_t* alg_rules; + int i; + ompi_coll_alg_rule_t* alg_rules; - alg_rules = (ompi_coll_alg_rule_t *) calloc (n_alg, sizeof (ompi_coll_alg_rule_t)); - if (!alg_rules) return (alg_rules); + alg_rules = (ompi_coll_alg_rule_t *) calloc (n_alg, sizeof (ompi_coll_alg_rule_t)); + if (!alg_rules) return (alg_rules); - /* set all we can at this point */ - for (i=0;ialg_rule_id, - msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id, + msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id)); - OPAL_OUTPUT((ompi_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n", - msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n", + msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize)); - return (0); + return (0); } int ompi_coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p) { - int i; + int i; - if (!com_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Com rule was a NULL ptr?!\n")); - return (-1); - } + if (!com_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Com rule was a NULL ptr?!\n")); + return (-1); + } - OPAL_OUTPUT((ompi_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize)); - if (!com_p->n_msg_sizes) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"no msgsizes defined\n")); - return (0); - } + if (!com_p->n_msg_sizes) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"no msgsizes defined\n")); + return (0); + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes)); - for (i=0;in_msg_sizes;i++) { - ompi_coll_tuned_dump_msg_rule (&(com_p->msg_rules[i])); - } + for (i=0;in_msg_sizes;i++) { + ompi_coll_tuned_dump_msg_rule (&(com_p->msg_rules[i])); + } - return (0); + return (0); } int ompi_coll_tuned_dump_alg_rule (ompi_coll_alg_rule_t* alg_p) { - int i; + int i; - if (!alg_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); - return (-1); - } + if (!alg_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); + return (-1); + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id)); - if (!alg_p->n_com_sizes) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"no coms defined\n")); - return (0); - } + if (!alg_p->n_com_sizes) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"no coms defined\n")); + return (0); + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes)); - for (i=0;in_com_sizes;i++) { - ompi_coll_tuned_dump_com_rule (&(alg_p->com_rules[i])); - } + for (i=0;in_com_sizes;i++) { + ompi_coll_tuned_dump_com_rule (&(alg_p->com_rules[i])); + } - return (0); + return (0); } int ompi_coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules) { - int i; + int i; - if (!alg_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); - return (-1); - } + if (!alg_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); + return (-1); + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules)); - for (i=0;in_msg_sizes) { - msg_p = com_p->msg_rules; + if (com_p->n_msg_sizes) { + msg_p = com_p->msg_rules; - if (!msg_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes)); - rc = -1; /* some error */ - } - else { - /* ok, memory exists for the msg rules so free that first */ - free (com_p->msg_rules); - com_p->msg_rules = (ompi_coll_msg_rule_t*) NULL; - } + if (!msg_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes)); + rc = -1; /* some error */ + } + else { + /* ok, memory exists for the msg rules so free that first */ + free (com_p->msg_rules); + com_p->msg_rules = (ompi_coll_msg_rule_t*) NULL; + } - } /* if we have msg rules to free as well */ + } /* if we have msg rules to free as well */ - - return (rc); + return (rc); } int ompi_coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p) { - int rc=0; - int i; + int rc=0; + int i; - ompi_coll_com_rule_t* com_p; + ompi_coll_com_rule_t* com_p; - if (!alg_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n")); - return (-1); - } + if (!alg_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n")); + return (-1); + } - if (alg_p->n_com_sizes) { - com_p = alg_p->com_rules; + if (alg_p->n_com_sizes) { + com_p = alg_p->com_rules; - if (!com_p) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes)); - } - else { - /* ok, memory exists for the com rules so free their message rules first */ - for (i=0;in_com_sizes;i++) { - com_p = &(alg_p->com_rules[i]); - ompi_coll_tuned_free_msg_rules_in_com_rule (com_p); + if (!com_p) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes)); + } + else { + /* ok, memory exists for the com rules so free their message rules first */ + for (i=0;in_com_sizes;i++) { + com_p = &(alg_p->com_rules[i]); + ompi_coll_tuned_free_msg_rules_in_com_rule (com_p); + } + /* we are now free to free the com rules themselives */ + free (alg_p->com_rules); + alg_p->com_rules = (ompi_coll_com_rule_t*) NULL; } - /* we are now free to free the com rules themselives */ - free (alg_p->com_rules); - alg_p->com_rules = (ompi_coll_com_rule_t*) NULL; - } - } /* if we have msg rules to free as well */ + } /* if we have msg rules to free as well */ - return (rc); + return (rc); } int ompi_coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs) { - int i; - int rc = 0; + int i; + int rc = 0; - for(i=0;in_com_sizes) { /* check for count of communicator sizes */ - return ((ompi_coll_com_rule_t*)NULL); /* no com sizes so no rule */ - } + if (!alg_p->n_com_sizes) { /* check for count of communicator sizes */ + return ((ompi_coll_com_rule_t*)NULL); /* no com sizes so no rule */ + } - /* ok have some com sizes, now to find the one closest to my mpi_comsize */ + /* ok have some com sizes, now to find the one closest to my mpi_comsize */ - /* make a copy of the first com rule */ - best_com_p = com_p = alg_p->com_rules; - i = best = 0; + /* make a copy of the first com rule */ + best_com_p = com_p = alg_p->com_rules; + i = best = 0; - while (in_com_sizes) { -/* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */ -/* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */ - if (com_p->mpi_comsize <= mpi_comsize) { - best = i; - best_com_p = com_p; -/* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ - } - else { -/* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ - break; - } - /* go to the next entry */ - com_p++; - i++; - } + while (in_com_sizes) { + /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */ + /* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */ + if (com_p->mpi_comsize <= mpi_comsize) { + best = i; + best_com_p = com_p; + /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ + } + else { + /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ + break; + } + /* go to the next entry */ + com_p++; + i++; + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id)); - ompi_coll_tuned_dump_com_rule (best_com_p); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id)); + ompi_coll_tuned_dump_com_rule (best_com_p); - return (best_com_p); + return (best_com_p); } /* @@ -356,61 +355,61 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru */ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int *result_topo_faninout, - int* result_segsize) + int* result_segsize) { - ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL; - ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; - int i, best; + ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL; + ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; + int i, best; - if (!base_com_rule) { - return (0); - } + if (!base_com_rule) { + return (0); + } - if (!result_topo_faninout) { - return (0); - } + if (!result_topo_faninout) { + return (0); + } - if (!result_segsize) { - return (0); - } + if (!result_segsize) { + return (0); + } - if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */ - return (0); /* no msg sizes so no rule */ - } + if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */ + return (0); /* no msg sizes so no rule */ + } - /* ok have some msg sizes, now to find the one closest to my mpi_msgsize */ + /* ok have some msg sizes, now to find the one closest to my mpi_msgsize */ - /* make a copy of the first msg rule */ - best_msg_p = msg_p = base_com_rule->msg_rules; - i = best = 0; + /* make a copy of the first msg rule */ + best_msg_p = msg_p = base_com_rule->msg_rules; + i = best = 0; - while (in_msg_sizes) { -/* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */ -/* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */ - if (msg_p->msg_size <= mpi_msgsize) { - best = i; - best_msg_p = msg_p; -/* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ - } - else { -/* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ - break; - } - /* go to the next entry */ - msg_p++; - i++; - } + while (in_msg_sizes) { + /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */ + /* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */ + if (msg_p->msg_size <= mpi_msgsize) { + best = i; + best_msg_p = msg_p; + /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ + } + else { + /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ + break; + } + /* go to the next entry */ + msg_p++; + i++; + } - OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id)); - ompi_coll_tuned_dump_msg_rule (best_msg_p); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id)); + ompi_coll_tuned_dump_msg_rule (best_msg_p); - /* return the segment size */ - *result_topo_faninout = best_msg_p->result_topo_faninout; + /* return the segment size */ + *result_topo_faninout = best_msg_p->result_topo_faninout; - /* return the segment size */ - *result_segsize = best_msg_p->result_segsize; + /* return the segment size */ + *result_segsize = best_msg_p->result_segsize; - /* return the algorithm/method to use */ - return (best_msg_p->result_alg); + /* return the algorithm/method to use */ + return (best_msg_p->result_alg); } diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.c b/ompi/mca/coll/tuned/coll_tuned_forced.c index 65940943c2..4bad74ba5c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_forced.c +++ b/ompi/mca/coll/tuned/coll_tuned_forced.c @@ -42,24 +42,24 @@ /* recheck the setting of forced, called on module create (i.e. for each new comm) */ int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) + coll_tuned_force_algorithm_params_t *forced_values) { - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); - mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize)); - mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout)); - mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout)); + mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); + mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize)); + mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout)); + mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout)); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } /* special version of above just for barrier which only has one option available (at the moment...) */ int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) + coll_tuned_force_algorithm_params_t *forced_values) { - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); + mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); -return (MPI_SUCCESS); + return (MPI_SUCCESS); } diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.h b/ompi/mca/coll/tuned/coll_tuned_forced.h index 7f34148e7e..aeb39dc3f8 100644 --- a/ompi/mca/coll/tuned/coll_tuned_forced.h +++ b/ompi/mca/coll/tuned/coll_tuned_forced.h @@ -61,8 +61,6 @@ int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indic int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, coll_tuned_force_algorithm_params_t *forced_values); - - #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index f1da5c15b9..de60453cd7 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -45,87 +45,87 @@ static const mca_coll_base_module_1_0_0_t *to_use = NULL; */ static const mca_coll_base_module_1_0_0_t intra_fixed = { - /* Initialization / finalization functions */ + /* Initialization / finalization functions */ - ompi_coll_tuned_module_init, - ompi_coll_tuned_module_finalize, + ompi_coll_tuned_module_init, + ompi_coll_tuned_module_finalize, - /* Collective function pointers */ + /* Collective function pointers */ -/* ompi_coll_tuned_allgather_intra_dec_fixed, */ + /* ompi_coll_tuned_allgather_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_allgatherv_intra_dec_fixed, */ + /* ompi_coll_tuned_allgatherv_intra_dec_fixed, */ NULL, - ompi_coll_tuned_allreduce_intra_dec_fixed, -/* NULL, */ - ompi_coll_tuned_alltoall_intra_dec_fixed, -/* NULL, */ -/* ompi_coll_tuned_alltoallv_intra_dec_fixed, */ + ompi_coll_tuned_allreduce_intra_dec_fixed, + /* NULL, */ + ompi_coll_tuned_alltoall_intra_dec_fixed, + /* NULL, */ + /* ompi_coll_tuned_alltoallv_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_alltoallw_intra_dec_fixed, */ + /* ompi_coll_tuned_alltoallw_intra_dec_fixed, */ NULL, - ompi_coll_tuned_barrier_intra_dec_fixed, -/* NULL, */ - ompi_coll_tuned_bcast_intra_dec_fixed, -/* NULL, */ -/* ompi_coll_tuned_exscan_intra_dec_fixed, */ + ompi_coll_tuned_barrier_intra_dec_fixed, + /* NULL, */ + ompi_coll_tuned_bcast_intra_dec_fixed, + /* NULL, */ + /* ompi_coll_tuned_exscan_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_gather_intra_dec_fixed, */ + /* ompi_coll_tuned_gather_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_gatherv_intra_dec_fixed, */ + /* ompi_coll_tuned_gatherv_intra_dec_fixed, */ NULL, - ompi_coll_tuned_reduce_intra_dec_fixed, -/* NULL, */ -/* ompi_coll_tuned_reduce_scatter_intra_dec_fixed, */ + ompi_coll_tuned_reduce_intra_dec_fixed, + /* NULL, */ + /* ompi_coll_tuned_reduce_scatter_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_scan_intra_dec_fixed, */ + /* ompi_coll_tuned_scan_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_scatter_intra_dec_fixed, */ + /* ompi_coll_tuned_scatter_intra_dec_fixed, */ NULL, -/* ompi_coll_tuned_scatterv_intra_dec_fixed */ + /* ompi_coll_tuned_scatterv_intra_dec_fixed */ NULL }; static const mca_coll_base_module_1_0_0_t intra_dynamic = { - /* Initialization / finalization functions */ + /* Initialization / finalization functions */ - ompi_coll_tuned_module_init, - ompi_coll_tuned_module_finalize, + ompi_coll_tuned_module_init, + ompi_coll_tuned_module_finalize, - /* Collective function pointers */ + /* Collective function pointers */ -/* ompi_coll_tuned_allgather_intra_dec_dynamic, */ + /* ompi_coll_tuned_allgather_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_allgatherv_intra_dec_dynamic, */ + /* ompi_coll_tuned_allgatherv_intra_dec_dynamic, */ NULL, - ompi_coll_tuned_allreduce_intra_dec_dynamic, -/* NULL, */ - ompi_coll_tuned_alltoall_intra_dec_dynamic, -/* NULL, */ -/* ompi_coll_tuned_alltoallv_intra_dec_dynamic, */ + ompi_coll_tuned_allreduce_intra_dec_dynamic, + /* NULL, */ + ompi_coll_tuned_alltoall_intra_dec_dynamic, + /* NULL, */ + /* ompi_coll_tuned_alltoallv_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_alltoallw_intra_dec_dynamic, */ + /* ompi_coll_tuned_alltoallw_intra_dec_dynamic, */ NULL, - ompi_coll_tuned_barrier_intra_dec_dynamic, -/* NULL, */ - ompi_coll_tuned_bcast_intra_dec_dynamic, -/* NULL, */ -/* ompi_coll_tuned_exscan_intra_dec_dynamic, */ + ompi_coll_tuned_barrier_intra_dec_dynamic, + /* NULL, */ + ompi_coll_tuned_bcast_intra_dec_dynamic, + /* NULL, */ + /* ompi_coll_tuned_exscan_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_gather_intra_dec_dynamic, */ + /* ompi_coll_tuned_gather_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_gatherv_intra_dec_dynamic, */ + /* ompi_coll_tuned_gatherv_intra_dec_dynamic, */ NULL, - ompi_coll_tuned_reduce_intra_dec_dynamic, -/* NULL, */ -/* ompi_coll_tuned_reduce_scatter_intra_dec_dynamic, */ + ompi_coll_tuned_reduce_intra_dec_dynamic, + /* NULL, */ + /* ompi_coll_tuned_reduce_scatter_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scan_intra_dec_dynamic, */ + /* ompi_coll_tuned_scan_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scatter_intra_dec_dynamic, */ + /* ompi_coll_tuned_scatter_intra_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scatterv_intra_dec_dynamic */ + /* ompi_coll_tuned_scatterv_intra_dec_dynamic */ NULL }; @@ -137,87 +137,87 @@ static const mca_coll_base_module_1_0_0_t intra_dynamic = { */ static const mca_coll_base_module_1_0_0_t inter_fixed = { - /* Initialization / finalization functions */ + /* Initialization / finalization functions */ - ompi_coll_tuned_module_init, - ompi_coll_tuned_module_finalize, + ompi_coll_tuned_module_init, + ompi_coll_tuned_module_finalize, - /* Collective function pointers */ + /* Collective function pointers */ -/* ompi_coll_tuned_allgather_inter_dec_fixed, */ + /* ompi_coll_tuned_allgather_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_allgatherv_inter_dec_fixed, */ + /* ompi_coll_tuned_allgatherv_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_allreduce_inter_dec_fixed, */ + /* ompi_coll_tuned_allreduce_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_alltoall_inter_dec_fixed, */ + /* ompi_coll_tuned_alltoall_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_alltoallv_inter_dec_fixed, */ + /* ompi_coll_tuned_alltoallv_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_alltoallw_inter_dec_fixed, */ + /* ompi_coll_tuned_alltoallw_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_barrier_inter_dec_fixed, */ + /* ompi_coll_tuned_barrier_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_bcast_inter_dec_fixed, */ + /* ompi_coll_tuned_bcast_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_exscan_inter_dec_fixed, */ + /* ompi_coll_tuned_exscan_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_gather_inter_dec_fixed, */ + /* ompi_coll_tuned_gather_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_gatherv_inter_dec_fixed, */ + /* ompi_coll_tuned_gatherv_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_reduce_inter_dec_fixed, */ + /* ompi_coll_tuned_reduce_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_reduce_scatter_inter_dec_fixed, */ + /* ompi_coll_tuned_reduce_scatter_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_scan_inter_dec_fixed, */ + /* ompi_coll_tuned_scan_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_scatter_inter_dec_fixed, */ + /* ompi_coll_tuned_scatter_inter_dec_fixed, */ NULL, -/* ompi_coll_tuned_scatterv_inter_dec_fixed */ + /* ompi_coll_tuned_scatterv_inter_dec_fixed */ NULL }; static const mca_coll_base_module_1_0_0_t inter_dynamic = { - /* Initialization / finalization functions */ + /* Initialization / finalization functions */ - ompi_coll_tuned_module_init, - ompi_coll_tuned_module_finalize, + ompi_coll_tuned_module_init, + ompi_coll_tuned_module_finalize, - /* Collective function pointers */ + /* Collective function pointers */ -/* ompi_coll_tuned_allgather_inter_dec_dynamic, */ + /* ompi_coll_tuned_allgather_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_allgatherv_inter_dec_dynamic, */ + /* ompi_coll_tuned_allgatherv_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_allreduce_inter_dec_dynamic, */ + /* ompi_coll_tuned_allreduce_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_alltoall_inter_dec_dynamic, */ + /* ompi_coll_tuned_alltoall_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_alltoallv_inter_dec_dynamic, */ + /* ompi_coll_tuned_alltoallv_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_alltoallw_inter_dec_dynamic, */ + /* ompi_coll_tuned_alltoallw_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_barrier_inter_dec_dynamic, */ + /* ompi_coll_tuned_barrier_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_bcast_inter_dec_dynamic, */ + /* ompi_coll_tuned_bcast_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_exscan_inter_dec_dynamic, */ + /* ompi_coll_tuned_exscan_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_gather_inter_dec_dynamic, */ + /* ompi_coll_tuned_gather_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_gatherv_inter_dec_dynamic, */ + /* ompi_coll_tuned_gatherv_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_reduce_inter_dec_dynamic, */ + /* ompi_coll_tuned_reduce_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_reduce_scatter_inter_dec_dynamic, */ + /* ompi_coll_tuned_reduce_scatter_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scan_inter_dec_dynamic, */ + /* ompi_coll_tuned_scan_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scatter_inter_dec_dynamic, */ + /* ompi_coll_tuned_scatter_inter_dec_dynamic, */ NULL, -/* ompi_coll_tuned_scatterv_inter_dec_dynamic */ + /* ompi_coll_tuned_scatterv_inter_dec_dynamic */ NULL }; @@ -233,7 +233,7 @@ static const mca_coll_base_module_1_0_0_t inter_dynamic = { * required level of thread support. */ int ompi_coll_tuned_init_query(bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { /* Nothing to do */ @@ -248,38 +248,38 @@ int ompi_coll_tuned_init_query(bool enable_progress_threads, */ const mca_coll_base_module_1_0_0_t * ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority, - struct mca_coll_base_comm_t **data) + struct mca_coll_base_comm_t **data) { OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called")); *priority = ompi_coll_tuned_priority; - /* - * Choose whether to use [intra|inter] decision functions - * and if using fixed OR dynamic rule sets. - * Right now you cannot mix them, maybe later on it can be changed - * but this would probably add an extra if and funct call to the path - * - */ + /* + * Choose whether to use [intra|inter] decision functions + * and if using fixed OR dynamic rule sets. + * Right now you cannot mix them, maybe later on it can be changed + * but this would probably add an extra if and funct call to the path + * + */ - if (OMPI_COMM_IS_INTER(comm)) { - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); - to_use = &inter_dynamic; - } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); - to_use = &inter_fixed; + if (OMPI_COMM_IS_INTER(comm)) { + if (ompi_coll_tuned_use_dynamic_rules) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); + to_use = &inter_dynamic; + } else { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); + to_use = &inter_fixed; + } + } else { /* is an intra comm */ + if (ompi_coll_tuned_use_dynamic_rules) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); + to_use = &intra_dynamic; + } else { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); + to_use = &intra_fixed; + } } - } else { /* is an intra comm */ - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); - to_use = &intra_dynamic; - } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); - to_use = &intra_fixed; - } - } - return to_use; + return to_use; } @@ -289,199 +289,199 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority, const struct mca_coll_base_module_1_0_0_t * ompi_coll_tuned_module_init(struct ompi_communicator_t *comm) { - int size, rank; - struct mca_coll_base_comm_t *data; - /* fanout parameters */ - int rc=0; - int i; + int size, rank; + struct mca_coll_base_comm_t *data; + /* fanout parameters */ + int rc=0; + int i; - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); - /* This routine will become more complex and might have to be */ - /* broken into more sections/function calls */ + /* This routine will become more complex and might have to be */ + /* broken into more sections/function calls */ - /* Order of operations: - * alloc memory for nb reqs (in case we fall through) - * add decision rules if using dynamic rules - * compact rules using communicator size info etc - * build first guess cached topologies (might depend on the rules from above) - * - * then attach all to the communicator and return base module funct ptrs - */ + /* Order of operations: + * alloc memory for nb reqs (in case we fall through) + * add decision rules if using dynamic rules + * compact rules using communicator size info etc + * build first guess cached topologies (might depend on the rules from above) + * + * then attach all to the communicator and return base module funct ptrs + */ - /* Allocate the data that hangs off the communicator */ + /* Allocate the data that hangs off the communicator */ - if (OMPI_COMM_IS_INTER(comm)) { - size = ompi_comm_remote_size(comm); - } else { - size = ompi_comm_size(comm); - } - - - /* - * we still malloc data as it is used by the TUNED modules - * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers - * we place any special info after the default data - * - * BUT on very large systems we might not be able to allocate all this memory so - * we do check a MCA parameter to see if if we should allocate this memory - * - * The default is set very high - * - */ - - /* if we within the memory/size limit, allow preallocated data */ - - - if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) { - data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) + - (sizeof(ompi_request_t *) * size * 2)); - - if (NULL == data) { - return NULL; + if (OMPI_COMM_IS_INTER(comm)) { + size = ompi_comm_remote_size(comm); + } else { + size = ompi_comm_size(comm); } - data->mcct_reqs = (ompi_request_t **) (data + 1); - data->mcct_num_reqs = size * 2; - } - else { - data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t)); + + + /* + * we still malloc data as it is used by the TUNED modules + * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers + * we place any special info after the default data + * + * BUT on very large systems we might not be able to allocate all this memory so + * we do check a MCA parameter to see if if we should allocate this memory + * + * The default is set very high + * + */ + + /* if we within the memory/size limit, allow preallocated data */ + + + if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) { + data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) + + (sizeof(ompi_request_t *) * size * 2)); - if (NULL == data) { - return NULL; - } - data->mcct_reqs = (ompi_request_t **) NULL; - data->mcct_num_reqs = 0; - } + if (NULL == data) { + return NULL; + } + data->mcct_reqs = (ompi_request_t **) (data + 1); + data->mcct_num_reqs = size * 2; + } + else { + data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t)); + + if (NULL == data) { + return NULL; + } + data->mcct_reqs = (ompi_request_t **) NULL; + data->mcct_num_reqs = 0; + } - /* - * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. - * then this effects how much storage space you need - * (This is a basic version of what will go into V2) - * - */ + /* + * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. + * then this effects how much storage space you need + * (This is a basic version of what will go into V2) + * + */ - size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ - /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ - /* which only has rules in it for our com size */ + size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ + /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ + /* which only has rules in it for our com size */ - rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */ - /* actually if they are below a threadhold, they all open it */ - /* have to build a collective in here.. but just for MCW.. */ - /* but we have to make sure we have the same rules everywhere :( */ + rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */ + /* actually if they are below a threadhold, they all open it */ + /* have to build a collective in here.. but just for MCW.. */ + /* but we have to make sure we have the same rules everywhere :( */ - /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ - if (ompi_coll_tuned_use_dynamic_rules) { + /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ + if (ompi_coll_tuned_use_dynamic_rules) { /* base rules */ data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; /* each collective rule for my com size */ for (i=0;icom_rules[i] = (ompi_coll_com_rule_t*) NULL; + data->com_rules[i] = (ompi_coll_com_rule_t*) NULL; } - } + } - /* next dynamic state, recheck all forced rules as well */ - /* warning, we should check to make sure this is really an INTRA comm here... */ - if (ompi_coll_tuned_use_dynamic_rules) { + /* next dynamic state, recheck all forced rules as well */ + /* warning, we should check to make sure this is really an INTRA comm here... */ + if (ompi_coll_tuned_use_dynamic_rules) { ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL])); -/* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */ + /* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */ ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE])); - } + } - if (&ompi_mpi_comm_world==comm) { + if (&ompi_mpi_comm_world==comm) { - if (ompi_coll_tuned_use_dynamic_rules) { + if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); - if (ompi_coll_tuned_dynamic_rules_filename) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", - ompi_coll_tuned_dynamic_rules_filename)); - rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, - &(data->all_base_rules), COLLCOUNT); - if (rc>=0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); - /* at this point we all have a base set of rules */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); + if (ompi_coll_tuned_dynamic_rules_filename) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", + ompi_coll_tuned_dynamic_rules_filename)); + rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, + &(data->all_base_rules), COLLCOUNT); + if (rc>=0) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); + /* at this point we all have a base set of rules */ + /* now we can get our customized communicator sized rule set, for each collective */ + for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); + } + } + else { /* failed to read config file, thus make sure its a NULL... */ + data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; } - } - else { /* failed to read config file, thus make sure its a NULL... */ - data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; - } - } /* end if a config filename exists */ + } /* end if a config filename exists */ - } /* end if dynamic_rules */ + } /* end if dynamic_rules */ - } /* end if MCW */ + } /* end if MCW */ - /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ - /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& + /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ + /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ + if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); - /* this will, erm fail if MCW doesn't exist which it should! */ - data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; + /* this will, erm fail if MCW doesn't exist which it should! */ + data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; - /* at this point we all have a base set of rules if they exist atall */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); - } - } + /* at this point we all have a base set of rules if they exist atall */ + /* now we can get our customized communicator sized rule set, for each collective */ + for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); + } + } - /* - * now for the cached topo functions - * guess the initial topologies to use rank 0 as root - */ + /* + * now for the cached topo functions + * guess the initial topologies to use rank 0 as root + */ - /* general n fan out tree */ - data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0); - data->cached_ntree_root = 0; - data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; + /* general n fan out tree */ + data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0); + data->cached_ntree_root = 0; + data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; - /* binary tree */ - data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); - data->cached_bintree_root = 0; + /* binary tree */ + data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); + data->cached_bintree_root = 0; - /* binomial tree */ - data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); - data->cached_bmtree_root = 0; + /* binomial tree */ + data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); + data->cached_bmtree_root = 0; - /* - * chains (fanout followed by pipelines) - * are more difficuilt as the fan out really really depends on message size [sometimes].. - * as size gets larger fan-out gets smaller [usually] - * - * will probably change how we cache this later, for now a midsize - * GEF - */ - data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0); - data->cached_chain_root = 0; - data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; + /* + * chains (fanout followed by pipelines) + * are more difficuilt as the fan out really really depends on message size [sometimes].. + * as size gets larger fan-out gets smaller [usually] + * + * will probably change how we cache this later, for now a midsize + * GEF + */ + data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0); + data->cached_chain_root = 0; + data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; - /* standard pipeline */ - data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); - data->cached_pipeline_root = 0; + /* standard pipeline */ + data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); + data->cached_pipeline_root = 0; - /* All done */ + /* All done */ - comm->c_coll_selected_data = data; + comm->c_coll_selected_data = data; - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); - return to_use; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); + return to_use; } @@ -490,48 +490,48 @@ ompi_coll_tuned_module_init(struct ompi_communicator_t *comm) */ int ompi_coll_tuned_module_finalize(struct ompi_communicator_t *comm) { - if (NULL == comm->c_coll_selected_module) { - return OMPI_SUCCESS; - } + if (NULL == comm->c_coll_selected_module) { + return OMPI_SUCCESS; + } #if OMPI_ENABLE_DEBUG - /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing - the generel c_coll_selected_data */ + /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing + the generel c_coll_selected_data */ - comm->c_coll_selected_data->mcct_reqs = NULL; - comm->c_coll_selected_data->mcct_num_reqs = 0; + comm->c_coll_selected_data->mcct_reqs = NULL; + comm->c_coll_selected_data->mcct_num_reqs = 0; #endif - /* free any cached information that has been allocated */ - if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */ + /* free any cached information that has been allocated */ + if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree); - } - if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */ + } + if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); - } - if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */ + } + if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree); - } - if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */ + } + if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */ ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain); - } - if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */ + } + if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */ ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline); - } + } - /* if any algorithm rules are cached on the communicator, only free them if its MCW */ - /* as this is the only place they are allocated by reading the decision configure file */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { - if (comm->c_coll_selected_data->all_base_rules) { - ompi_coll_tuned_free_all_rules (comm->c_coll_selected_data->all_base_rules, COLLCOUNT); - } - } + /* if any algorithm rules are cached on the communicator, only free them if its MCW */ + /* as this is the only place they are allocated by reading the decision configure file */ + if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { + if (comm->c_coll_selected_data->all_base_rules) { + ompi_coll_tuned_free_all_rules (comm->c_coll_selected_data->all_base_rules, COLLCOUNT); + } + } - /* if allocated memory free it */ - if (comm->c_coll_selected_data) { - free(comm->c_coll_selected_data); - comm->c_coll_selected_data = NULL; - } - return OMPI_SUCCESS; + /* if allocated memory free it */ + if (comm->c_coll_selected_data) { + free(comm->c_coll_selected_data); + comm->c_coll_selected_data = NULL; + } + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce.c b/ompi/mca/coll/tuned/coll_tuned_reduce.c index b1fddf7bf6..2c5097752a 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce.c @@ -37,9 +37,9 @@ */ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, - ompi_datatype_t* datatype, ompi_op_t* op, - int root, ompi_communicator_t* comm, uint32_t segsize, - int fanout) + ompi_datatype_t* datatype, ompi_op_t* op, + int root, ompi_communicator_t* comm, uint32_t segsize, + int fanout) { int ret, line, rank, size, i = 0; int recvcount, sendcount, prevcount, inbi, previnbi; @@ -96,10 +96,10 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, } realsegsize = segcount * ext; -/* printf("rank %d root %d count %d \t\t segsize %d typesize %d typeext %d realsegsize %d segcount %d num_segments %d\n", */ -/* rank, root, count, segsize, typelng, ext, realsegsize, segcount, num_segments); */ + /* printf("rank %d root %d count %d \t\t segsize %d typesize %d typeext %d realsegsize %d segcount %d num_segments %d\n", */ + /* rank, root, count, segsize, typelng, ext, realsegsize, segcount, num_segments); */ -/* ompi_coll_tuned_topo_dump_chain (chain, rank); */ + /* ompi_coll_tuned_topo_dump_chain (chain, rank); */ if (sendbuf != MPI_IN_PLACE) { @@ -111,10 +111,10 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, /* handle special case when size == 1 */ if (1 == size ) { - if (sendbuf != MPI_IN_PLACE) { - ompi_ddt_copy_content_same_ddt( datatype, count, (char*)recvbuf, (char*)sendbuf ); - } - return MPI_SUCCESS; + if (sendbuf != MPI_IN_PLACE) { + ompi_ddt_copy_content_same_ddt( datatype, count, (char*)recvbuf, (char*)sendbuf ); + } + return MPI_SUCCESS; } /* handle non existant recv buffer (i.e. its NULL.. like basic allreduce uses!) */ @@ -173,26 +173,26 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, /* check for root might not be needed as it should be checked higher up */ if ((MPI_IN_PLACE==sendbuf)&&(rank==root)) { ret = MCA_PML_CALL(irecv(inbuf[inbi], - recvcount,datatype, - chain->chain_next[i], - MCA_COLL_BASE_TAG_REDUCE, - comm, &reqs[inbi])); + recvcount,datatype, + chain->chain_next[i], + MCA_COLL_BASE_TAG_REDUCE, + comm, &reqs[inbi])); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } else { ret = MCA_PML_CALL(irecv(accumbuf+segindex*realsegsize, - recvcount,datatype, - chain->chain_next[i], - MCA_COLL_BASE_TAG_REDUCE, - comm, &reqs[inbi])); + recvcount,datatype, + chain->chain_next[i], + MCA_COLL_BASE_TAG_REDUCE, + comm, &reqs[inbi])); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } } /* if first segment */ else { /* perform a irecv into the standard inbuf */ - ret = MCA_PML_CALL(irecv(inbuf[inbi],recvcount,datatype, - chain->chain_next[i], - MCA_COLL_BASE_TAG_REDUCE, - comm, &reqs[inbi])); - if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + ret = MCA_PML_CALL(irecv(inbuf[inbi],recvcount,datatype, + chain->chain_next[i], + MCA_COLL_BASE_TAG_REDUCE, + comm, &reqs[inbi])); + if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } } @@ -255,11 +255,11 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, } /* end of for each segment */ /* clean up */ -/* if (inbuf!=NULL) { */ - if (inbuf[0] != NULL) free(inbuf[0]); - if (inbuf[1] != NULL) free(inbuf[1]); - if (allocedaccumbuf) free(accumbuf); -/* } */ + /* if (inbuf!=NULL) { */ + if (inbuf[0] != NULL) free(inbuf[0]); + if (inbuf[1] != NULL) free(inbuf[1]); + if (allocedaccumbuf) free(accumbuf); + /* } */ } /* leaf nodes */ @@ -280,19 +280,19 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, /* error handler */ error_hndl: OPAL_OUTPUT (( ompi_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); -/* if( inbuf != NULL ) { */ - if( inbuf[0] != NULL ) free(inbuf[0]); - if( inbuf[1] != NULL ) free(inbuf[1]); - if (allocedaccumbuf) free(accumbuf); -/* } */ + /* if( inbuf != NULL ) { */ + if( inbuf[0] != NULL ) free(inbuf[0]); + if( inbuf[1] != NULL ) free(inbuf[1]); + if (allocedaccumbuf) free(accumbuf); + /* } */ return ret; } int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, - int count, ompi_datatype_t* datatype, - ompi_op_t* op, int root, - ompi_communicator_t* comm, uint32_t segsize ) + int count, ompi_datatype_t* datatype, + ompi_op_t* op, int root, + ompi_communicator_t* comm, uint32_t segsize ) { int rank; @@ -301,8 +301,8 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d", rank, segsize)); return ompi_coll_tuned_reduce_intra_chain( sendbuf,recvbuf, count, - datatype, op, root, comm, - segsize, 1 ); + datatype, op, root, comm, + segsize, 1 ); } @@ -329,9 +329,9 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, */ int ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm) { int i, rank, err, size; ptrdiff_t true_lb, true_extent, lb, extent; @@ -356,8 +356,8 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, return err; } -/* see discussion in ompi_coll_basic_reduce_lin_intra about extent and true extend */ -/* for reducing buffer allocation lengths.... */ + /* see discussion in ompi_coll_basic_reduce_lin_intra about extent and true extend */ + /* for reducing buffer allocation lengths.... */ ompi_ddt_get_extent(dtype, &lb, &extent); ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent); @@ -449,88 +449,85 @@ int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_m int rc; int max_alg = 3; - ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg; + ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg; -rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "reduce_algorithm_count", - "Number of reduce algorithms available", - false, true, max_alg, NULL); + rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, + "reduce_algorithm_count", + "Number of reduce algorithms available", + false, true, max_alg, NULL); -mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "reduce_algorithm", - "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline", - false, false, 0, NULL); + mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "reduce_algorithm", + "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline", + false, false, 0, NULL); -mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "reduce_algorithm_segmentsize", - "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - false, false, 0, NULL); + mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "reduce_algorithm_segmentsize", + "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", + false, false, 0, NULL); -mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "reduce_algorithm_tree_fanout", - "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", - false, false, - ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "reduce_algorithm_tree_fanout", + "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", + false, false, + ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); -mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "reduce_algorithm_chain_fanout", - "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); - -return (MPI_SUCCESS); + mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "reduce_algorithm_chain_fanout", + "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); + return (MPI_SUCCESS); } int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, int root, - struct ompi_communicator_t *comm) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, + struct ompi_communicator_t *comm) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", - comm->c_coll_selected_data->user_forced[REDUCE].algorithm)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", + comm->c_coll_selected_data->user_forced[REDUCE].algorithm)); -switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) { + switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, - comm->c_coll_selected_data->user_forced[REDUCE].segsize, - comm->c_coll_selected_data->user_forced[REDUCE].chain_fanout); + comm->c_coll_selected_data->user_forced[REDUCE].segsize, + comm->c_coll_selected_data->user_forced[REDUCE].chain_fanout); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, - comm->c_coll_selected_data->user_forced[REDUCE].segsize); + comm->c_coll_selected_data->user_forced[REDUCE].segsize); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", - comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); + comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); return (MPI_ERR_ARG); } /* switch */ - } int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, int root, - struct ompi_communicator_t *comm, - int algorithm, int faninout, int segsize) + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, + struct ompi_communicator_t *comm, + int algorithm, int faninout, int segsize) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d", - algorithm, faninout, segsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d", + algorithm, faninout, segsize)); -switch (algorithm) { + switch (algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, - segsize, faninout); + segsize, faninout); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, - segsize); + segsize); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", - algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); + algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); return (MPI_ERR_ARG); } /* switch */ - } diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.c b/ompi/mca/coll/tuned/coll_tuned_topo.c index 4a276557f0..80ab410642 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.c +++ b/ompi/mca/coll/tuned/coll_tuned_topo.c @@ -68,8 +68,8 @@ static int calculate_num_nodes_up_to_level( int fanout, int level ) ompi_coll_tree_t* ompi_coll_tuned_topo_build_tree( int fanout, - struct ompi_communicator_t* comm, - int root ) + struct ompi_communicator_t* comm, + int root ) { int rank, size; int schild, sparent; @@ -187,7 +187,7 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree ) ompi_coll_tree_t* ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, - int root ) + int root ) { int childs = 0; int rank; @@ -256,8 +256,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, ompi_coll_chain_t* ompi_coll_tuned_topo_build_chain( int fanout, - struct ompi_communicator_t* comm, - int root ) + struct ompi_communicator_t* comm, + int root ) { int rank, size; int srank; /* shifted rank */ @@ -428,23 +428,23 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain ) int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank) { -int i; -OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank, - tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev)); -if (tree->tree_nextsize) { - for (i=0;itree_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i])); -} -return (0); + int i; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank, + tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev)); + if (tree->tree_nextsize) { + for (i=0;itree_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i])); + } + return (0); } int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank) { -int i; -OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank, - chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev)); -if (chain->chain_nextsize) { - for (i=0;ichain_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i])); -} -return (0); + int i; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank, + chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev)); + if (chain->chain_nextsize) { + for (i=0;ichain_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i])); + } + return (0); } diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.h b/ompi/mca/coll/tuned/coll_tuned_topo.h index d1b5af17c9..f73fbb6eba 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.h +++ b/ompi/mca/coll/tuned/coll_tuned_topo.h @@ -65,8 +65,6 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain ); int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank); int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank); - - #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/ompi/mca/coll/tuned/coll_tuned_util.c b/ompi/mca/coll/tuned/coll_tuned_util.c index e1e8722514..7b11503615 100644 --- a/ompi/mca/coll/tuned/coll_tuned_util.c +++ b/ompi/mca/coll/tuned/coll_tuned_util.c @@ -29,16 +29,16 @@ #include "coll_tuned_util.h" int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ) + int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ) { /* post receive first, then send, then waitall... should be fast (I hope) */ -int err, line = 0; -ompi_request_t* reqs[2]; -ompi_status_public_t statuses[2]; + int err, line = 0; + ompi_request_t* reqs[2]; + ompi_status_public_t statuses[2]; /* post new irecv */ err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &reqs[0])); @@ -68,14 +68,14 @@ ompi_status_public_t statuses[2]; */ int ompi_coll_tuned_sendrecv_actual_localcompleted ( - void* sendbuf, int scount, ompi_datatype_t* sdatatype, int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, int source, int rtag, - struct ompi_communicator_t* comm, ompi_status_public_t* status ) + void* sendbuf, int scount, ompi_datatype_t* sdatatype, int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, int source, int rtag, + struct ompi_communicator_t* comm, ompi_status_public_t* status ) { /* post receive first, then [local] sync send, then wait... should be fast (I hope) */ -int err, line = 0; -ompi_request_t* req; -ompi_status_public_t tmpstatus; + int err, line = 0; + ompi_request_t* req; + ompi_status_public_t tmpstatus; /* post new irecv */ err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &req)); @@ -98,3 +98,4 @@ ompi_status_public_t tmpstatus; OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err)); return (err); } + diff --git a/ompi/mca/coll/tuned/coll_tuned_util.h b/ompi/mca/coll/tuned/coll_tuned_util.h index af4d204d2f..41861fa900 100644 --- a/ompi/mca/coll/tuned/coll_tuned_util.h +++ b/ompi/mca/coll/tuned/coll_tuned_util.h @@ -34,55 +34,51 @@ extern "C" { /* prototypes */ int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); + int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); /* inline functions */ static inline int ompi_coll_tuned_sendrecv( void* sendbuf, int scount, ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, int myid ) + int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status, int myid ) { if ((dest==myid)&&(source==myid)) { return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype); } - else { - return ompi_coll_tuned_sendrecv_actual (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, - source, rtag, comm, status); - } + return ompi_coll_tuned_sendrecv_actual (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, + source, rtag, comm, status); } int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); + int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); /* inline functions */ static inline int ompi_coll_tuned_sendrecv_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, int rcount, ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, int myid ) + int dest, int stag, + void* recvbuf, int rcount, ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status, int myid ) { if ((dest==myid)&&(source==myid)) { return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype); } - else { - return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, - source, rtag, comm, status); - } + return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, + source, rtag, comm, status); } #if defined(c_plusplus) || defined(__cplusplus)