From 4e6a6fc146f1f73cb35f802693e7bfdd4934f931 Mon Sep 17 00:00:00 2001 From: Rainer Keller Date: Tue, 11 Oct 2011 22:49:01 +0000 Subject: [PATCH] - Check, whether the compiler supports __builtin_clz (count leading zeroes); if so, use it for bit-operations like opal_cube_dim and opal_hibit. Implement two versions of power-of-two. In case of opal_next_poweroftwo, this reduces the average execution time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining, measured rdtsc, with loop over 2^27 values). Numbers for other functions are similar (but of course heavily depend on the usage, e.g. opal_hibit() with a start of 4 does not save much). The bsr instruction on AMD Opteron is also not as fast. - Replace various places where the next power-of-two is computed. Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes. This commit was SVN r25270. --- ompi/mca/btl/openib/btl_openib_mca.c | 13 +- ompi/mca/btl/sm/btl_sm.h | 5 +- ompi/mca/btl/sm/btl_sm_component.c | 9 +- ompi/mca/btl/wv/btl_wv_mca.c | 13 +- .../coll/basic/coll_basic_reduce_scatter.c | 5 +- ompi/mca/coll/tuned/coll_tuned_allgather.c | 3 +- ompi/mca/coll/tuned/coll_tuned_allreduce.c | 4 +- ompi/mca/coll/tuned/coll_tuned_barrier.c | 5 +- .../coll/tuned/coll_tuned_decision_fixed.c | 5 +- .../coll/tuned/coll_tuned_reduce_scatter.c | 5 +- ompi/mca/coll/tuned/coll_tuned_topo.c | 3 +- opal/class/opal_hash_table.c | 8 +- opal/config/opal_setup_cc.m4 | 20 +- opal/util/bit_ops.h | 106 ++++++++- test/util/Makefile.am | 14 +- test/util/opal_bit_ops.c | 217 ++++++++++++++++++ 16 files changed, 375 insertions(+), 60 deletions(-) create mode 100644 test/util/opal_bit_ops.c diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c index 19f0a41f5f..1b14d749a4 100644 --- a/ompi/mca/btl/openib/btl_openib_mca.c +++ b/ompi/mca/btl/openib/btl_openib_mca.c @@ -26,10 +26,11 @@ #include +#include "opal/util/bit_ops.h" #include "opal/mca/installdirs/installdirs.h" -#include "orte/util/show_help.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" +#include "orte/util/show_help.h" #include "btl_openib.h" #include "btl_openib_mca.h" #include "btl_openib_ini.h" @@ -554,16 +555,10 @@ int btl_openib_register_mca_params(void) &mca_btl_openib_module.super)); /* setup all the qp stuff */ - mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4; /* round mid_qp_size to smallest power of two */ - for(i = 31; i > 0; i--) { - if(!(mid_qp_size & (1<> 1; + /* mid_qp_size = MAX (mid_qp_size, 1024); ?! */ if(mid_qp_size <= 128) { mid_qp_size = 1024; } diff --git a/ompi/mca/btl/sm/btl_sm.h b/ompi/mca/btl/sm/btl_sm.h index 46638d6e91..a0267e0773 100644 --- a/ompi/mca/btl/sm/btl_sm.h +++ b/ompi/mca/btl/sm/btl_sm.h @@ -39,6 +39,7 @@ #include "knem_io.h" #endif /* OMPI_BTL_SM_HAVE_KNEM */ +#include "opal/util/bit_ops.h" #include "opal/class/opal_free_list.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/common/sm/common_sm.h" @@ -265,9 +266,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool, int i, qsize; /* figure out the queue size (a power of two that is at least 1) */ - qsize = 1; - while ( qsize < fifo_size ) - qsize <<= 1; + qsize = opal_next_poweroftwo_inclusive (fifo_size); /* allocate the queue in the receiver's address space */ fifo->queue_recv = (volatile void **)mpool->mpool_alloc( diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index 5737dd07f0..34dd9ede56 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -43,6 +43,7 @@ #include "ompi/constants.h" #include "opal/mca/event/event.h" +#include "opal/util/bit_ops.h" #include "opal/util/output.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" @@ -225,13 +226,9 @@ static int sm_register(void) static int mca_btl_sm_component_open(void) { mca_btl_sm_component.sm_max_btls = 1; + /* make sure the number of fifos is a power of 2 */ - { - int i = 1; - while ( i < mca_btl_sm_component.nfifos ) - i <<= 1; - mca_btl_sm_component.nfifos = i; - } + mca_btl_sm_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_sm_component.nfifos); /* make sure that queue size and lazy free parameter are compatible */ if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) ) diff --git a/ompi/mca/btl/wv/btl_wv_mca.c b/ompi/mca/btl/wv/btl_wv_mca.c index 05b71de800..ce0cf90b4c 100644 --- a/ompi/mca/btl/wv/btl_wv_mca.c +++ b/ompi/mca/btl/wv/btl_wv_mca.c @@ -26,10 +26,11 @@ #include +#include "opal/util/bit_ops.h" #include "opal/mca/installdirs/installdirs.h" -#include "orte/util/show_help.h" #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" +#include "orte/util/show_help.h" #include "btl_wv.h" #include "btl_wv_mca.h" #include "btl_wv_ini.h" @@ -471,16 +472,10 @@ int btl_wv_register_mca_params(void) &mca_btl_wv_module.super)); /* setup all the qp stuff */ - mid_qp_size = mca_btl_wv_module.super.btl_eager_limit / 4; /* round mid_qp_size to smallest power of two */ - for(i = 31; i > 0; i--) { - if(!(mid_qp_size & (1<> 1; + /* mid_qp_size = MAX (mid_qp_size, 1024); ?! */ if(mid_qp_size <= 128) { mid_qp_size = 1024; } diff --git a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c index de57c03aa5..2a394a90d2 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c @@ -24,6 +24,7 @@ #include #include "mpi.h" +#include "opal/util/bit_ops.h" #include "ompi/constants.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" @@ -112,7 +113,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) && (buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) { - int tmp_size = 1, remain = 0, tmp_rank; + int tmp_size, remain = 0, tmp_rank; /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); @@ -133,7 +134,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, /* figure out power of two mapping: grow until larger than comm size, then go back one, to get the largest power of two less than comm size */ - while (tmp_size <= size) tmp_size <<= 1; + tmp_size = opal_next_poweroftwo(size); tmp_size >>= 1; remain = size - tmp_size; diff --git a/ompi/mca/coll/tuned/coll_tuned_allgather.c b/ompi/mca/coll/tuned/coll_tuned_allgather.c index 71b78abc1b..be328f635b 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allgather.c +++ b/ompi/mca/coll/tuned/coll_tuned_allgather.c @@ -20,6 +20,7 @@ #include "ompi_config.h" #include "mpi.h" +#include "opal/util/bit_ops.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/communicator/communicator.h" @@ -271,7 +272,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount, size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - for (pow2size = 1; pow2size <= size; pow2size <<=1); + pow2size = opal_next_poweroftwo (size); pow2size >>=1; /* Current implementation only handles power-of-two number of processes. diff --git a/ompi/mca/coll/tuned/coll_tuned_allreduce.c b/ompi/mca/coll/tuned/coll_tuned_allreduce.c index 9460ae8eea..7562c9f1c4 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allreduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_allreduce.c @@ -20,6 +20,7 @@ #include "ompi_config.h" #include "mpi.h" +#include "opal/util/bit_ops.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/communicator/communicator.h" @@ -170,7 +171,8 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, tmprecv = (char*) rbuf; /* Determine nearest power of two less than or equal to size */ - for (adjsize = 0x1; adjsize <= size; adjsize <<= 1); adjsize = adjsize >> 1; + adjsize = opal_next_poweroftwo (size); + adjsize >>= 1; /* Handle non-power-of-two case: - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and diff --git a/ompi/mca/coll/tuned/coll_tuned_barrier.c b/ompi/mca/coll/tuned/coll_tuned_barrier.c index d5240e47bb..a2cf36d477 100644 --- a/ompi/mca/coll/tuned/coll_tuned_barrier.c +++ b/ompi/mca/coll/tuned/coll_tuned_barrier.c @@ -20,6 +20,7 @@ #include "ompi_config.h" #include "mpi.h" +#include "opal/util/bit_ops.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" @@ -134,7 +135,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t * rank)); /* do nearest power of 2 less than size calc */ - for( adjsize = 1; adjsize <= size; adjsize <<= 1 ); + adjsize = opal_next_poweroftwo(size); adjsize >>= 1; /* if size is not exact power of two, perform an extra step */ @@ -354,7 +355,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm, rank)); /* Find the nearest power of 2 of the communicator size. */ - for(depth = 1; depth < size; depth <<= 1 ); + depth = opal_next_poweroftwo_inclusive(size); for (jump=1; jump>= 1; remain = size - tmp_size; diff --git a/ompi/mca/coll/tuned/coll_tuned_topo.c b/ompi/mca/coll/tuned/coll_tuned_topo.c index 5485ee1320..0f903015ae 100644 --- a/ompi/mca/coll/tuned/coll_tuned_topo.c +++ b/ompi/mca/coll/tuned/coll_tuned_topo.c @@ -19,6 +19,7 @@ #include "ompi_config.h" #include "mpi.h" +#include "opal/util/bit_ops.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/base/coll_tags.h" @@ -363,7 +364,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, if( index < 0 ) index += size; - while( mask <= index ) mask <<= 1; + mask = opal_next_poweroftwo(index); /* Now I can compute my father rank */ if( root == rank ) { diff --git a/opal/class/opal_hash_table.c b/opal/class/opal_hash_table.c index 19af0a82c5..8af65e8c56 100644 --- a/opal/class/opal_hash_table.c +++ b/opal/class/opal_hash_table.c @@ -25,6 +25,7 @@ #include "opal/class/opal_list.h" #include "opal/class/opal_hash_table.h" #include "opal/constants.h" +#include "opal/util/bit_ops.h" /* * opal_hash_table_t @@ -70,12 +71,7 @@ static void opal_hash_table_destruct(opal_hash_table_t* ht) int opal_hash_table_init(opal_hash_table_t* ht, size_t table_size) { size_t i; - size_t power2 = 1; - size_t tmp = table_size; - while(tmp) { - tmp >>= 1; - power2 <<= 1; - } + size_t power2 = opal_next_poweroftwo (table_size); ht->ht_mask = power2-1; ht->ht_table = (opal_list_t *)malloc(power2 * sizeof(opal_list_t)); diff --git a/opal/config/opal_setup_cc.m4 b/opal/config/opal_setup_cc.m4 index 125491b55e..9e44fac0c4 100644 --- a/opal/config/opal_setup_cc.m4 +++ b/opal/config/opal_setup_cc.m4 @@ -246,7 +246,7 @@ AC_DEFUN([OPAL_SETUP_CC],[ have_cc_builtin_expect=0 fi AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_EXPECT], [$have_cc_builtin_expect], - [Whether C compiler supports __builtin_expect]) + [Whether C compiler supports __builtin_expect]) # see if the C compiler supports __builtin_prefetch AC_CACHE_CHECK([if $CC supports __builtin_prefetch], @@ -262,7 +262,23 @@ AC_DEFUN([OPAL_SETUP_CC],[ have_cc_builtin_prefetch=0 fi AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_PREFETCH], [$have_cc_builtin_prefetch], - [Whether C compiler supports __builtin_prefetch]) + [Whether C compiler supports __builtin_prefetch]) + + # see if the C compiler supports __builtin_clz + AC_CACHE_CHECK([if $CC supports __builtin_clz], + [ompi_cv_cc_supports___builtin_clz], + [AC_TRY_LINK([], + [int value = 0xffff; /* we know we have 16 bits set */ + if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;], + [ompi_cv_cc_supports___builtin_clz="yes"], + [ompi_cv_cc_supports___builtin_clz="no"])]) + if test "$ompi_cv_cc_supports___builtin_clz" = "yes" ; then + have_cc_builtin_clz=1 + else + have_cc_builtin_clz=0 + fi + AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_CLZ], [$have_cc_builtin_clz], + [Whether C compiler supports __builtin_clz]) # Preload the optflags for the case where the user didn't specify # any. If we're using GNU compilers, use -O3 (since it GNU diff --git a/opal/util/bit_ops.h b/opal/util/bit_ops.h index 743742506d..e08a91fe0d 100644 --- a/opal/util/bit_ops.h +++ b/opal/util/bit_ops.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -19,6 +19,8 @@ #ifndef OPAL_BIT_OPS_H #define OPAL_BIT_OPS_H +#include "opal/prefetch.h" + /** * Calculates the highest bit in an integer * @@ -33,21 +35,35 @@ * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. + * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead + * of 17 cycles (on average value, with start=32) + * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int opal_hibit(int value, int start) { - unsigned int mask; + unsigned int mask; - --start; - mask = 1 << start; +#if OPAL_C_HAVE_BUILTIN_CLZ + /* Only look at the part that the caller wanted looking at */ + mask = value & ((1 << start) - 1); - for (; start >= 0; --start, mask >>= 1) { - if (value & mask) { - break; + if (OPAL_UNLIKELY (0 == mask)) { + return -1; } - } + + start = (8*sizeof(int)-1) - __builtin_clz(mask); +#else + --start; + mask = 1 << start; + + for (; start >= 0; --start, mask >>= 1) { + if (value & mask) { + break; + } + } +#endif - return start; + return start; } @@ -63,16 +79,84 @@ static inline int opal_hibit(int value, int start) * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. + * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles + * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int opal_cube_dim(int value) { int dim, size; - for (dim = 0, size = 1; size < value; ++dim, size <<= 1) { - continue; +#if OPAL_C_HAVE_BUILTIN_CLZ + if (OPAL_UNLIKELY (1 >= value)) { + return 0; } + size = 8 * sizeof(int); + dim = size - __builtin_clz(value-1); +#else + for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */; +#endif return dim; } + +/** + * @brief Returns next power-of-two of the given value. + * + * @param value The integer value to return power of 2 + * + * @returns The next power of two + * + * WARNING: *NO* error checking is performed. This is meant to be a + * fast inline function. + * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77 + * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). + */ +static inline int opal_next_poweroftwo(int value) +{ + int power2; + +#if OPAL_C_HAVE_BUILTIN_CLZ + if (OPAL_UNLIKELY (0 == value)) { + return 1; + } + power2 = 1 << (8 * sizeof (int) - __builtin_clz(value)); +#else + for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */; +#endif + + return power2; +} + + +/** + * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two). + * + * @param value The integer value to return power of 2 + * + * @returns The next power of two (inclusive) + * + * WARNING: *NO* error checking is performed. This is meant to be a + * fast inline function. + * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56 + * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). + */ +static inline int opal_next_poweroftwo_inclusive(int value) +{ + int power2; + +#if OPAL_C_HAVE_BUILTIN_CLZ + if (OPAL_UNLIKELY (1 >= value)) { + return 1; + } + power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1)); +#else + for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */; +#endif + + return power2; +} + + #endif /* OPAL_BIT_OPS_H */ + diff --git a/test/util/Makefile.am b/test/util/Makefile.am index ae4a06e8b6..75cee624c6 100644 --- a/test/util/Makefile.am +++ b/test/util/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -31,8 +31,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/test/support check_PROGRAMS = \ - opal_sos \ - opal_path_nfs + opal_bit_ops \ + opal_path_nfs \ + opal_sos TESTS = \ $(check_PROGRAMS) @@ -66,6 +67,13 @@ TESTS = \ # $(top_builddir)/test/support/libsupport.a #opal_basename_DEPENDENCIES = $(opal_basename_LDADD) +opal_bit_ops_SOURCES = opal_bit_ops.c +opal_bit_ops_LDADD = \ + $(top_builddir)/opal/libopen-pal.la \ + $(top_builddir)/test/support/libsupport.a +opal_bit_ops_DEPENDENCIES = $(opal_path_nfs_LDADD) + + opal_path_nfs_SOURCES = opal_path_nfs.c opal_path_nfs_LDADD = \ $(top_builddir)/opal/libopen-pal.la \ diff --git a/test/util/opal_bit_ops.c b/test/util/opal_bit_ops.c new file mode 100644 index 0000000000..ccfde961a0 --- /dev/null +++ b/test/util/opal_bit_ops.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "support.h" +#include "opal/util/bit_ops.h" +#include "opal/util/output.h" + +/* +#define DEBUG +*/ + +static int test_hibit(int value, int start); +static int test_cube_dim(int value); +static int test_next_poweroftwo(int value); +static int test_next_poweroftwo_inclusive(int value); + +int main(int argc, char* argv[]) +{ + int i; + int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */}; + test_init("opal_bit_ops()"); + +#ifdef DEBUG + printf ("Test usage: ./opal_bit_ops [VALUES]\n"); +#endif + + if (1 < argc) { + for (i = 1; i < argc; i++) { + int value; + value = atoi (argv[i]); + printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n", + i, value, test_next_poweroftwo(value) ? "correct" : "wrong"); + } + } + + for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) { + test_hibit (vals[i], 8 * sizeof(int) -2); + test_hibit (vals[i], 3); + test_cube_dim (vals[i]); + test_next_poweroftwo (vals[i]); + test_next_poweroftwo_inclusive (vals[i]); + } + + /* All done */ + return test_finalize(); +} + + +/* REFERENCE FUNCTION */ +static int hibit(int value, int start) +{ + unsigned int mask; + + --start; + mask = 1 << start; + + for (; start >= 0; --start, mask >>= 1) { + if (value & mask) { + break; + } + } + + return start; +} + +static int test_hibit(int value, int start) +{ + int out; + int bit = hibit (value, start); + +#ifdef DEBUG + printf ("test_hibit(): value:%d expect:%d\n", + value, bit); +#endif + + if (bit == (out = opal_hibit (value, start))) { + test_success(); + return 1; + } else { + char * msg; + asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n", + start, value, bit, out); + test_failure(msg); + free(msg); + } + return 0; +} + + +/* REFERENCE FUNCTION */ +static int cube_dim(int value) +{ + int dim, size; + + for (dim = 0, size = 1; size < value; ++dim, size <<= 1); + + return dim; +} + +static int test_cube_dim(int value) +{ + int out; + int dim = cube_dim (value); + +#ifdef DEBUG + printf ("test_cube_dim(): value:%d expect:%d\n", + value, dim); +#endif + + if (dim == (out = opal_cube_dim (value))) { + test_success(); + return 1; + } else { + char * msg; + asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n", + value, dim, out); + test_failure(msg); + free(msg); + } + return 0; +} + + +/* REFERENCE FUNCTION */ +static int next_poweroftwo(int value) +{ + int power2; + + for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */; + + return power2; +} + + +static int test_next_poweroftwo(int value) +{ + int out; + int power2 = next_poweroftwo (value); + +#ifdef DEBUG + printf ("test_next_poweroftwo(): value:%d expect:%d\n", + value, power2); +#endif + + if (power2 == (out = opal_next_poweroftwo (value))) { + test_success(); + return 1; + } else { + char * msg; + asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n", + value, power2, out); + test_failure(msg); + free(msg); + } + return 0; +} + + + +/* REFERENCE FUNCTION */ +static int next_poweroftwo_inclusive(int value) +{ + int power2 = 1; + + while ( power2 < value ) + power2 <<= 1; + + return power2; +} + +static int test_next_poweroftwo_inclusive(int value) +{ + int out; + int power2 = next_poweroftwo_inclusive (value); + +#ifdef DEBUG + printf ("test_next_poweroftwo(): value:%d expect:%d\n", + value, power2); +#endif + + if (power2 == (out = opal_next_poweroftwo_inclusive (value))) { + test_success(); + return 1; + } else { + char * msg; + asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n", + value, power2, out); + test_failure(msg); + free(msg); + } + + return 0; +} + + + +