- Check, whether the compiler supports __builtin_clz (count leading
zeroes); if so, use it for bit-operations like opal_cube_dim and opal_hibit. Implement two versions of power-of-two. In case of opal_next_poweroftwo, this reduces the average execution time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining, measured rdtsc, with loop over 2^27 values). Numbers for other functions are similar (but of course heavily depend on the usage, e.g. opal_hibit() with a start of 4 does not save much). The bsr instruction on AMD Opteron is also not as fast. - Replace various places where the next power-of-two is computed. Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes. This commit was SVN r25270.
Этот коммит содержится в:
родитель
74c88a9e48
Коммит
4e6a6fc146
@ -26,10 +26,11 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_mca.h"
|
||||
#include "btl_openib_ini.h"
|
||||
@ -554,16 +555,10 @@ int btl_openib_register_mca_params(void)
|
||||
&mca_btl_openib_module.super));
|
||||
|
||||
/* setup all the qp stuff */
|
||||
mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
|
||||
/* round mid_qp_size to smallest power of two */
|
||||
for(i = 31; i > 0; i--) {
|
||||
if(!(mid_qp_size & (1<<i))) {
|
||||
continue;
|
||||
}
|
||||
mid_qp_size = (1<<i);
|
||||
break;
|
||||
}
|
||||
mid_qp_size = opal_next_poweroftwo (mca_btl_openib_module.super.btl_eager_limit / 4) >> 1;
|
||||
|
||||
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
|
||||
if(mid_qp_size <= 128) {
|
||||
mid_qp_size = 1024;
|
||||
}
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "knem_io.h"
|
||||
#endif /* OMPI_BTL_SM_HAVE_KNEM */
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
@ -265,9 +266,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
|
||||
int i, qsize;
|
||||
|
||||
/* figure out the queue size (a power of two that is at least 1) */
|
||||
qsize = 1;
|
||||
while ( qsize < fifo_size )
|
||||
qsize <<= 1;
|
||||
qsize = opal_next_poweroftwo_inclusive (fifo_size);
|
||||
|
||||
/* allocate the queue in the receiver's address space */
|
||||
fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
|
||||
|
@ -43,6 +43,7 @@
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -225,13 +226,9 @@ static int sm_register(void)
|
||||
static int mca_btl_sm_component_open(void)
|
||||
{
|
||||
mca_btl_sm_component.sm_max_btls = 1;
|
||||
|
||||
/* make sure the number of fifos is a power of 2 */
|
||||
{
|
||||
int i = 1;
|
||||
while ( i < mca_btl_sm_component.nfifos )
|
||||
i <<= 1;
|
||||
mca_btl_sm_component.nfifos = i;
|
||||
}
|
||||
mca_btl_sm_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_sm_component.nfifos);
|
||||
|
||||
/* make sure that queue size and lazy free parameter are compatible */
|
||||
if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) )
|
||||
|
@ -26,10 +26,11 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "btl_wv.h"
|
||||
#include "btl_wv_mca.h"
|
||||
#include "btl_wv_ini.h"
|
||||
@ -471,16 +472,10 @@ int btl_wv_register_mca_params(void)
|
||||
&mca_btl_wv_module.super));
|
||||
|
||||
/* setup all the qp stuff */
|
||||
mid_qp_size = mca_btl_wv_module.super.btl_eager_limit / 4;
|
||||
/* round mid_qp_size to smallest power of two */
|
||||
for(i = 31; i > 0; i--) {
|
||||
if(!(mid_qp_size & (1<<i))) {
|
||||
continue;
|
||||
}
|
||||
mid_qp_size = (1<<i);
|
||||
break;
|
||||
}
|
||||
mid_qp_size = opal_next_poweroftwo (mca_btl_wv_module.super.btl_eager_limit / 4) >> 1;
|
||||
|
||||
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
|
||||
if(mid_qp_size <= 128) {
|
||||
mid_qp_size = 1024;
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <errno.h>
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
@ -112,7 +113,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
|
||||
if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) &&
|
||||
(buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) {
|
||||
int tmp_size = 1, remain = 0, tmp_rank;
|
||||
int tmp_size, remain = 0, tmp_rank;
|
||||
|
||||
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */
|
||||
recv_buf_free = (char*) malloc(buf_size);
|
||||
@ -133,7 +134,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
/* figure out power of two mapping: grow until larger than
|
||||
comm size, then go back one, to get the largest power of
|
||||
two less than comm size */
|
||||
while (tmp_size <= size) tmp_size <<= 1;
|
||||
tmp_size = opal_next_poweroftwo(size);
|
||||
tmp_size >>= 1;
|
||||
remain = size - tmp_size;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
@ -271,7 +272,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
for (pow2size = 1; pow2size <= size; pow2size <<=1);
|
||||
pow2size = opal_next_poweroftwo (size);
|
||||
pow2size >>=1;
|
||||
|
||||
/* Current implementation only handles power-of-two number of processes.
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
@ -170,7 +171,8 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
|
||||
tmprecv = (char*) rbuf;
|
||||
|
||||
/* Determine nearest power of two less than or equal to size */
|
||||
for (adjsize = 0x1; adjsize <= size; adjsize <<= 1); adjsize = adjsize >> 1;
|
||||
adjsize = opal_next_poweroftwo (size);
|
||||
adjsize >>= 1;
|
||||
|
||||
/* Handle non-power-of-two case:
|
||||
- Even ranks less than 2 * extra_ranks send their data to (rank + 1), and
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
@ -134,7 +135,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
|
||||
rank));
|
||||
|
||||
/* do nearest power of 2 less than size calc */
|
||||
for( adjsize = 1; adjsize <= size; adjsize <<= 1 );
|
||||
adjsize = opal_next_poweroftwo(size);
|
||||
adjsize >>= 1;
|
||||
|
||||
/* if size is not exact power of two, perform an extra step */
|
||||
@ -354,7 +355,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
|
||||
rank));
|
||||
|
||||
/* Find the nearest power of 2 of the communicator size. */
|
||||
for(depth = 1; depth < size; depth <<= 1 );
|
||||
depth = opal_next_poweroftwo_inclusive(size);
|
||||
|
||||
for (jump=1; jump<depth; jump<<=1) {
|
||||
partner = rank ^ jump;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
@ -489,7 +490,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
|
||||
total_message_size *= dsize;
|
||||
|
||||
/* compute the nearest power of 2 */
|
||||
for (pow2 = 1; pow2 < comm_size; pow2 <<= 1);
|
||||
pow2 = opal_next_poweroftwo_inclusive (comm_size);
|
||||
|
||||
if ((total_message_size <= small_message_size) ||
|
||||
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
|
||||
@ -540,7 +541,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
|
||||
" rank %d com_size %d msg_length %lu",
|
||||
ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
|
||||
|
||||
for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
|
||||
pow2_size = opal_next_poweroftwo_inclusive (communicator_size);
|
||||
|
||||
/* Decision based on MX 2Gb results from Grig cluster at
|
||||
The University of Tennesse, Knoxville
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
@ -132,7 +133,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, size, count, err = OMPI_SUCCESS;
|
||||
int tmp_size = 1, remain = 0, tmp_rank;
|
||||
int tmp_size, remain = 0, tmp_rank;
|
||||
int *disps = NULL;
|
||||
ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
|
||||
char *recv_buf = NULL, *recv_buf_free = NULL;
|
||||
@ -189,7 +190,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
/* figure out power of two mapping: grow until larger than
|
||||
comm size, then go back one, to get the largest power of
|
||||
two less than comm size */
|
||||
while (tmp_size <= size) tmp_size <<= 1;
|
||||
tmp_size = opal_next_poweroftwo (size);
|
||||
tmp_size >>= 1;
|
||||
remain = size - tmp_size;
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
@ -363,7 +364,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
|
||||
if( index < 0 ) index += size;
|
||||
|
||||
while( mask <= index ) mask <<= 1;
|
||||
mask = opal_next_poweroftwo(index);
|
||||
|
||||
/* Now I can compute my father rank */
|
||||
if( root == rank ) {
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/constants.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
|
||||
/*
|
||||
* opal_hash_table_t
|
||||
@ -70,12 +71,7 @@ static void opal_hash_table_destruct(opal_hash_table_t* ht)
|
||||
int opal_hash_table_init(opal_hash_table_t* ht, size_t table_size)
|
||||
{
|
||||
size_t i;
|
||||
size_t power2 = 1;
|
||||
size_t tmp = table_size;
|
||||
while(tmp) {
|
||||
tmp >>= 1;
|
||||
power2 <<= 1;
|
||||
}
|
||||
size_t power2 = opal_next_poweroftwo (table_size);
|
||||
|
||||
ht->ht_mask = power2-1;
|
||||
ht->ht_table = (opal_list_t *)malloc(power2 * sizeof(opal_list_t));
|
||||
|
@ -246,7 +246,7 @@ AC_DEFUN([OPAL_SETUP_CC],[
|
||||
have_cc_builtin_expect=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_EXPECT], [$have_cc_builtin_expect],
|
||||
[Whether C compiler supports __builtin_expect])
|
||||
[Whether C compiler supports __builtin_expect])
|
||||
|
||||
# see if the C compiler supports __builtin_prefetch
|
||||
AC_CACHE_CHECK([if $CC supports __builtin_prefetch],
|
||||
@ -262,7 +262,23 @@ AC_DEFUN([OPAL_SETUP_CC],[
|
||||
have_cc_builtin_prefetch=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_PREFETCH], [$have_cc_builtin_prefetch],
|
||||
[Whether C compiler supports __builtin_prefetch])
|
||||
[Whether C compiler supports __builtin_prefetch])
|
||||
|
||||
# see if the C compiler supports __builtin_clz
|
||||
AC_CACHE_CHECK([if $CC supports __builtin_clz],
|
||||
[ompi_cv_cc_supports___builtin_clz],
|
||||
[AC_TRY_LINK([],
|
||||
[int value = 0xffff; /* we know we have 16 bits set */
|
||||
if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;],
|
||||
[ompi_cv_cc_supports___builtin_clz="yes"],
|
||||
[ompi_cv_cc_supports___builtin_clz="no"])])
|
||||
if test "$ompi_cv_cc_supports___builtin_clz" = "yes" ; then
|
||||
have_cc_builtin_clz=1
|
||||
else
|
||||
have_cc_builtin_clz=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_CLZ], [$have_cc_builtin_clz],
|
||||
[Whether C compiler supports __builtin_clz])
|
||||
|
||||
# Preload the optflags for the case where the user didn't specify
|
||||
# any. If we're using GNU compilers, use -O3 (since it GNU
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
@ -19,6 +19,8 @@
|
||||
#ifndef OPAL_BIT_OPS_H
|
||||
#define OPAL_BIT_OPS_H
|
||||
|
||||
#include "opal/prefetch.h"
|
||||
|
||||
/**
|
||||
* Calculates the highest bit in an integer
|
||||
*
|
||||
@ -33,21 +35,35 @@
|
||||
*
|
||||
* WARNING: *NO* error checking is performed. This is meant to be a
|
||||
* fast inline function.
|
||||
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
|
||||
* of 17 cycles (on average value, with start=32)
|
||||
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
|
||||
*/
|
||||
static inline int opal_hibit(int value, int start)
|
||||
{
|
||||
unsigned int mask;
|
||||
unsigned int mask;
|
||||
|
||||
--start;
|
||||
mask = 1 << start;
|
||||
#if OPAL_C_HAVE_BUILTIN_CLZ
|
||||
/* Only look at the part that the caller wanted looking at */
|
||||
mask = value & ((1 << start) - 1);
|
||||
|
||||
for (; start >= 0; --start, mask >>= 1) {
|
||||
if (value & mask) {
|
||||
break;
|
||||
if (OPAL_UNLIKELY (0 == mask)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
start = (8*sizeof(int)-1) - __builtin_clz(mask);
|
||||
#else
|
||||
--start;
|
||||
mask = 1 << start;
|
||||
|
||||
for (; start >= 0; --start, mask >>= 1) {
|
||||
if (value & mask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
@ -63,16 +79,84 @@ static inline int opal_hibit(int value, int start)
|
||||
*
|
||||
* WARNING: *NO* error checking is performed. This is meant to be a
|
||||
* fast inline function.
|
||||
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
|
||||
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
|
||||
*/
|
||||
static inline int opal_cube_dim(int value)
|
||||
{
|
||||
int dim, size;
|
||||
|
||||
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) {
|
||||
continue;
|
||||
#if OPAL_C_HAVE_BUILTIN_CLZ
|
||||
if (OPAL_UNLIKELY (1 >= value)) {
|
||||
return 0;
|
||||
}
|
||||
size = 8 * sizeof(int);
|
||||
dim = size - __builtin_clz(value-1);
|
||||
#else
|
||||
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
|
||||
#endif
|
||||
|
||||
return dim;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns next power-of-two of the given value.
|
||||
*
|
||||
* @param value The integer value to return power of 2
|
||||
*
|
||||
* @returns The next power of two
|
||||
*
|
||||
* WARNING: *NO* error checking is performed. This is meant to be a
|
||||
* fast inline function.
|
||||
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
|
||||
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
|
||||
*/
|
||||
static inline int opal_next_poweroftwo(int value)
|
||||
{
|
||||
int power2;
|
||||
|
||||
#if OPAL_C_HAVE_BUILTIN_CLZ
|
||||
if (OPAL_UNLIKELY (0 == value)) {
|
||||
return 1;
|
||||
}
|
||||
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
|
||||
#else
|
||||
for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
|
||||
#endif
|
||||
|
||||
return power2;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
|
||||
*
|
||||
* @param value The integer value to return power of 2
|
||||
*
|
||||
* @returns The next power of two (inclusive)
|
||||
*
|
||||
* WARNING: *NO* error checking is performed. This is meant to be a
|
||||
* fast inline function.
|
||||
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
|
||||
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
|
||||
*/
|
||||
static inline int opal_next_poweroftwo_inclusive(int value)
|
||||
{
|
||||
int power2;
|
||||
|
||||
#if OPAL_C_HAVE_BUILTIN_CLZ
|
||||
if (OPAL_UNLIKELY (1 >= value)) {
|
||||
return 1;
|
||||
}
|
||||
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
|
||||
#else
|
||||
for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
|
||||
#endif
|
||||
|
||||
return power2;
|
||||
}
|
||||
|
||||
|
||||
#endif /* OPAL_BIT_OPS_H */
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
@ -31,8 +31,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/test/support
|
||||
|
||||
|
||||
check_PROGRAMS = \
|
||||
opal_sos \
|
||||
opal_path_nfs
|
||||
opal_bit_ops \
|
||||
opal_path_nfs \
|
||||
opal_sos
|
||||
|
||||
TESTS = \
|
||||
$(check_PROGRAMS)
|
||||
@ -66,6 +67,13 @@ TESTS = \
|
||||
# $(top_builddir)/test/support/libsupport.a
|
||||
#opal_basename_DEPENDENCIES = $(opal_basename_LDADD)
|
||||
|
||||
opal_bit_ops_SOURCES = opal_bit_ops.c
|
||||
opal_bit_ops_LDADD = \
|
||||
$(top_builddir)/opal/libopen-pal.la \
|
||||
$(top_builddir)/test/support/libsupport.a
|
||||
opal_bit_ops_DEPENDENCIES = $(opal_path_nfs_LDADD)
|
||||
|
||||
|
||||
opal_path_nfs_SOURCES = opal_path_nfs.c
|
||||
opal_path_nfs_LDADD = \
|
||||
$(top_builddir)/opal/libopen-pal.la \
|
||||
|
217
test/util/opal_bit_ops.c
Обычный файл
217
test/util/opal_bit_ops.c
Обычный файл
@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "support.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
/*
|
||||
#define DEBUG
|
||||
*/
|
||||
|
||||
static int test_hibit(int value, int start);
|
||||
static int test_cube_dim(int value);
|
||||
static int test_next_poweroftwo(int value);
|
||||
static int test_next_poweroftwo_inclusive(int value);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int i;
|
||||
int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */};
|
||||
test_init("opal_bit_ops()");
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("Test usage: ./opal_bit_ops [VALUES]\n");
|
||||
#endif
|
||||
|
||||
if (1 < argc) {
|
||||
for (i = 1; i < argc; i++) {
|
||||
int value;
|
||||
value = atoi (argv[i]);
|
||||
printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n",
|
||||
i, value, test_next_poweroftwo(value) ? "correct" : "wrong");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {
|
||||
test_hibit (vals[i], 8 * sizeof(int) -2);
|
||||
test_hibit (vals[i], 3);
|
||||
test_cube_dim (vals[i]);
|
||||
test_next_poweroftwo (vals[i]);
|
||||
test_next_poweroftwo_inclusive (vals[i]);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return test_finalize();
|
||||
}
|
||||
|
||||
|
||||
/* REFERENCE FUNCTION */
|
||||
static int hibit(int value, int start)
|
||||
{
|
||||
unsigned int mask;
|
||||
|
||||
--start;
|
||||
mask = 1 << start;
|
||||
|
||||
for (; start >= 0; --start, mask >>= 1) {
|
||||
if (value & mask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static int test_hibit(int value, int start)
|
||||
{
|
||||
int out;
|
||||
int bit = hibit (value, start);
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("test_hibit(): value:%d expect:%d\n",
|
||||
value, bit);
|
||||
#endif
|
||||
|
||||
if (bit == (out = opal_hibit (value, start))) {
|
||||
test_success();
|
||||
return 1;
|
||||
} else {
|
||||
char * msg;
|
||||
asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n",
|
||||
start, value, bit, out);
|
||||
test_failure(msg);
|
||||
free(msg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* REFERENCE FUNCTION */
|
||||
static int cube_dim(int value)
|
||||
{
|
||||
int dim, size;
|
||||
|
||||
for (dim = 0, size = 1; size < value; ++dim, size <<= 1);
|
||||
|
||||
return dim;
|
||||
}
|
||||
|
||||
static int test_cube_dim(int value)
|
||||
{
|
||||
int out;
|
||||
int dim = cube_dim (value);
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("test_cube_dim(): value:%d expect:%d\n",
|
||||
value, dim);
|
||||
#endif
|
||||
|
||||
if (dim == (out = opal_cube_dim (value))) {
|
||||
test_success();
|
||||
return 1;
|
||||
} else {
|
||||
char * msg;
|
||||
asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n",
|
||||
value, dim, out);
|
||||
test_failure(msg);
|
||||
free(msg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* REFERENCE FUNCTION */
|
||||
static int next_poweroftwo(int value)
|
||||
{
|
||||
int power2;
|
||||
|
||||
for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */;
|
||||
|
||||
return power2;
|
||||
}
|
||||
|
||||
|
||||
static int test_next_poweroftwo(int value)
|
||||
{
|
||||
int out;
|
||||
int power2 = next_poweroftwo (value);
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
|
||||
value, power2);
|
||||
#endif
|
||||
|
||||
if (power2 == (out = opal_next_poweroftwo (value))) {
|
||||
test_success();
|
||||
return 1;
|
||||
} else {
|
||||
char * msg;
|
||||
asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n",
|
||||
value, power2, out);
|
||||
test_failure(msg);
|
||||
free(msg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* REFERENCE FUNCTION */
|
||||
static int next_poweroftwo_inclusive(int value)
|
||||
{
|
||||
int power2 = 1;
|
||||
|
||||
while ( power2 < value )
|
||||
power2 <<= 1;
|
||||
|
||||
return power2;
|
||||
}
|
||||
|
||||
static int test_next_poweroftwo_inclusive(int value)
|
||||
{
|
||||
int out;
|
||||
int power2 = next_poweroftwo_inclusive (value);
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
|
||||
value, power2);
|
||||
#endif
|
||||
|
||||
if (power2 == (out = opal_next_poweroftwo_inclusive (value))) {
|
||||
test_success();
|
||||
return 1;
|
||||
} else {
|
||||
char * msg;
|
||||
asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n",
|
||||
value, power2, out);
|
||||
test_failure(msg);
|
||||
free(msg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user