1
1

- Check, whether the compiler supports __builtin_clz (count leading

zeroes);
   if so, use it for bit-operations like opal_cube_dim and opal_hibit.
   Implement two versions of power-of-two.
   In case of opal_next_poweroftwo, this reduces the average execution
   time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
   measured rdtsc, with loop over 2^27 values).
   Numbers for other functions are similar (but of course heavily depend
   on the usage, e.g. opal_hibit() with a start of 4 does not save
   much).  The bsr instruction on AMD Opteron is also not as fast.

 - Replace various places where the next power-of-two is computed.
   
   Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
   Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.

This commit was SVN r25270.
Этот коммит содержится в:
Rainer Keller 2011-10-11 22:49:01 +00:00
родитель 74c88a9e48
Коммит 4e6a6fc146
16 изменённых файлов: 375 добавлений и 60 удалений

Просмотреть файл

@ -26,10 +26,11 @@
#include <string.h> #include <string.h>
#include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h" #include "opal/mca/installdirs/installdirs.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "btl_openib.h" #include "btl_openib.h"
#include "btl_openib_mca.h" #include "btl_openib_mca.h"
#include "btl_openib_ini.h" #include "btl_openib_ini.h"
@ -554,16 +555,10 @@ int btl_openib_register_mca_params(void)
&mca_btl_openib_module.super)); &mca_btl_openib_module.super));
/* setup all the qp stuff */ /* setup all the qp stuff */
mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
/* round mid_qp_size to smallest power of two */ /* round mid_qp_size to smallest power of two */
for(i = 31; i > 0; i--) { mid_qp_size = opal_next_poweroftwo (mca_btl_openib_module.super.btl_eager_limit / 4) >> 1;
if(!(mid_qp_size & (1<<i))) {
continue;
}
mid_qp_size = (1<<i);
break;
}
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
if(mid_qp_size <= 128) { if(mid_qp_size <= 128) {
mid_qp_size = 1024; mid_qp_size = 1024;
} }

Просмотреть файл

@ -39,6 +39,7 @@
#include "knem_io.h" #include "knem_io.h"
#endif /* OMPI_BTL_SM_HAVE_KNEM */ #endif /* OMPI_BTL_SM_HAVE_KNEM */
#include "opal/util/bit_ops.h"
#include "opal/class/opal_free_list.h" #include "opal/class/opal_free_list.h"
#include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/btl.h"
#include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/common/sm/common_sm.h"
@ -265,9 +266,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
int i, qsize; int i, qsize;
/* figure out the queue size (a power of two that is at least 1) */ /* figure out the queue size (a power of two that is at least 1) */
qsize = 1; qsize = opal_next_poweroftwo_inclusive (fifo_size);
while ( qsize < fifo_size )
qsize <<= 1;
/* allocate the queue in the receiver's address space */ /* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc( fifo->queue_recv = (volatile void **)mpool->mpool_alloc(

Просмотреть файл

@ -43,6 +43,7 @@
#include "ompi/constants.h" #include "ompi/constants.h"
#include "opal/mca/event/event.h" #include "opal/mca/event/event.h"
#include "opal/util/bit_ops.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
@ -225,13 +226,9 @@ static int sm_register(void)
static int mca_btl_sm_component_open(void) static int mca_btl_sm_component_open(void)
{ {
mca_btl_sm_component.sm_max_btls = 1; mca_btl_sm_component.sm_max_btls = 1;
/* make sure the number of fifos is a power of 2 */ /* make sure the number of fifos is a power of 2 */
{ mca_btl_sm_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_sm_component.nfifos);
int i = 1;
while ( i < mca_btl_sm_component.nfifos )
i <<= 1;
mca_btl_sm_component.nfifos = i;
}
/* make sure that queue size and lazy free parameter are compatible */ /* make sure that queue size and lazy free parameter are compatible */
if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) ) if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) )

Просмотреть файл

@ -26,10 +26,11 @@
#include <string.h> #include <string.h>
#include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h" #include "opal/mca/installdirs/installdirs.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "btl_wv.h" #include "btl_wv.h"
#include "btl_wv_mca.h" #include "btl_wv_mca.h"
#include "btl_wv_ini.h" #include "btl_wv_ini.h"
@ -471,16 +472,10 @@ int btl_wv_register_mca_params(void)
&mca_btl_wv_module.super)); &mca_btl_wv_module.super));
/* setup all the qp stuff */ /* setup all the qp stuff */
mid_qp_size = mca_btl_wv_module.super.btl_eager_limit / 4;
/* round mid_qp_size to smallest power of two */ /* round mid_qp_size to smallest power of two */
for(i = 31; i > 0; i--) { mid_qp_size = opal_next_poweroftwo (mca_btl_wv_module.super.btl_eager_limit / 4) >> 1;
if(!(mid_qp_size & (1<<i))) {
continue;
}
mid_qp_size = (1<<i);
break;
}
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
if(mid_qp_size <= 128) { if(mid_qp_size <= 128) {
mid_qp_size = 1024; mid_qp_size = 1024;
} }

Просмотреть файл

@ -24,6 +24,7 @@
#include <errno.h> #include <errno.h>
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/coll/base/coll_tags.h"
@ -112,7 +113,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) && if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) &&
(buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) { (buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) {
int tmp_size = 1, remain = 0, tmp_rank; int tmp_size, remain = 0, tmp_rank;
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */
recv_buf_free = (char*) malloc(buf_size); recv_buf_free = (char*) malloc(buf_size);
@ -133,7 +134,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
/* figure out power of two mapping: grow until larger than /* figure out power of two mapping: grow until larger than
comm size, then go back one, to get the largest power of comm size, then go back one, to get the largest power of
two less than comm size */ two less than comm size */
while (tmp_size <= size) tmp_size <<= 1; tmp_size = opal_next_poweroftwo(size);
tmp_size >>= 1; tmp_size >>= 1;
remain = size - tmp_size; remain = size - tmp_size;

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
@ -271,7 +272,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
for (pow2size = 1; pow2size <= size; pow2size <<=1); pow2size = opal_next_poweroftwo (size);
pow2size >>=1; pow2size >>=1;
/* Current implementation only handles power-of-two number of processes. /* Current implementation only handles power-of-two number of processes.

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
@ -170,7 +171,8 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
tmprecv = (char*) rbuf; tmprecv = (char*) rbuf;
/* Determine nearest power of two less than or equal to size */ /* Determine nearest power of two less than or equal to size */
for (adjsize = 0x1; adjsize <= size; adjsize <<= 1); adjsize = adjsize >> 1; adjsize = opal_next_poweroftwo (size);
adjsize >>= 1;
/* Handle non-power-of-two case: /* Handle non-power-of-two case:
- Even ranks less than 2 * extra_ranks send their data to (rank + 1), and - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
@ -134,7 +135,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
rank)); rank));
/* do nearest power of 2 less than size calc */ /* do nearest power of 2 less than size calc */
for( adjsize = 1; adjsize <= size; adjsize <<= 1 ); adjsize = opal_next_poweroftwo(size);
adjsize >>= 1; adjsize >>= 1;
/* if size is not exact power of two, perform an extra step */ /* if size is not exact power of two, perform an extra step */
@ -354,7 +355,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
rank)); rank));
/* Find the nearest power of 2 of the communicator size. */ /* Find the nearest power of 2 of the communicator size. */
for(depth = 1; depth < size; depth <<= 1 ); depth = opal_next_poweroftwo_inclusive(size);
for (jump=1; jump<depth; jump<<=1) { for (jump=1; jump<depth; jump<<=1) {
partner = rank ^ jump; partner = rank ^ jump;

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
@ -489,7 +490,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
total_message_size *= dsize; total_message_size *= dsize;
/* compute the nearest power of 2 */ /* compute the nearest power of 2 */
for (pow2 = 1; pow2 < comm_size; pow2 <<= 1); pow2 = opal_next_poweroftwo_inclusive (comm_size);
if ((total_message_size <= small_message_size) || if ((total_message_size <= small_message_size) ||
((total_message_size <= large_message_size) && (pow2 == comm_size)) || ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
@ -540,7 +541,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
" rank %d com_size %d msg_length %lu", " rank %d com_size %d msg_length %lu",
ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); pow2_size = opal_next_poweroftwo_inclusive (communicator_size);
/* Decision based on MX 2Gb results from Grig cluster at /* Decision based on MX 2Gb results from Grig cluster at
The University of Tennesse, Knoxville The University of Tennesse, Knoxville

Просмотреть файл

@ -21,6 +21,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
@ -132,7 +133,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
mca_coll_base_module_t *module) mca_coll_base_module_t *module)
{ {
int i, rank, size, count, err = OMPI_SUCCESS; int i, rank, size, count, err = OMPI_SUCCESS;
int tmp_size = 1, remain = 0, tmp_rank; int tmp_size, remain = 0, tmp_rank;
int *disps = NULL; int *disps = NULL;
ptrdiff_t true_lb, true_extent, lb, extent, buf_size; ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
char *recv_buf = NULL, *recv_buf_free = NULL; char *recv_buf = NULL, *recv_buf_free = NULL;
@ -189,7 +190,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* figure out power of two mapping: grow until larger than /* figure out power of two mapping: grow until larger than
comm size, then go back one, to get the largest power of comm size, then go back one, to get the largest power of
two less than comm size */ two less than comm size */
while (tmp_size <= size) tmp_size <<= 1; tmp_size = opal_next_poweroftwo (size);
tmp_size >>= 1; tmp_size >>= 1;
remain = size - tmp_size; remain = size - tmp_size;

Просмотреть файл

@ -19,6 +19,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "mpi.h" #include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/coll/base/coll_tags.h"
@ -363,7 +364,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
if( index < 0 ) index += size; if( index < 0 ) index += size;
while( mask <= index ) mask <<= 1; mask = opal_next_poweroftwo(index);
/* Now I can compute my father rank */ /* Now I can compute my father rank */
if( root == rank ) { if( root == rank ) {

Просмотреть файл

@ -25,6 +25,7 @@
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "opal/class/opal_hash_table.h" #include "opal/class/opal_hash_table.h"
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/util/bit_ops.h"
/* /*
* opal_hash_table_t * opal_hash_table_t
@ -70,12 +71,7 @@ static void opal_hash_table_destruct(opal_hash_table_t* ht)
int opal_hash_table_init(opal_hash_table_t* ht, size_t table_size) int opal_hash_table_init(opal_hash_table_t* ht, size_t table_size)
{ {
size_t i; size_t i;
size_t power2 = 1; size_t power2 = opal_next_poweroftwo (table_size);
size_t tmp = table_size;
while(tmp) {
tmp >>= 1;
power2 <<= 1;
}
ht->ht_mask = power2-1; ht->ht_mask = power2-1;
ht->ht_table = (opal_list_t *)malloc(power2 * sizeof(opal_list_t)); ht->ht_table = (opal_list_t *)malloc(power2 * sizeof(opal_list_t));

Просмотреть файл

@ -246,7 +246,7 @@ AC_DEFUN([OPAL_SETUP_CC],[
have_cc_builtin_expect=0 have_cc_builtin_expect=0
fi fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_EXPECT], [$have_cc_builtin_expect], AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_EXPECT], [$have_cc_builtin_expect],
[Whether C compiler supports __builtin_expect]) [Whether C compiler supports __builtin_expect])
# see if the C compiler supports __builtin_prefetch # see if the C compiler supports __builtin_prefetch
AC_CACHE_CHECK([if $CC supports __builtin_prefetch], AC_CACHE_CHECK([if $CC supports __builtin_prefetch],
@ -262,7 +262,23 @@ AC_DEFUN([OPAL_SETUP_CC],[
have_cc_builtin_prefetch=0 have_cc_builtin_prefetch=0
fi fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_PREFETCH], [$have_cc_builtin_prefetch], AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_PREFETCH], [$have_cc_builtin_prefetch],
[Whether C compiler supports __builtin_prefetch]) [Whether C compiler supports __builtin_prefetch])
# see if the C compiler supports __builtin_clz
AC_CACHE_CHECK([if $CC supports __builtin_clz],
[ompi_cv_cc_supports___builtin_clz],
[AC_TRY_LINK([],
[int value = 0xffff; /* we know we have 16 bits set */
if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;],
[ompi_cv_cc_supports___builtin_clz="yes"],
[ompi_cv_cc_supports___builtin_clz="no"])])
if test "$ompi_cv_cc_supports___builtin_clz" = "yes" ; then
have_cc_builtin_clz=1
else
have_cc_builtin_clz=0
fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_CLZ], [$have_cc_builtin_clz],
[Whether C compiler supports __builtin_clz])
# Preload the optflags for the case where the user didn't specify # Preload the optflags for the case where the user didn't specify
# any. If we're using GNU compilers, use -O3 (since it GNU # any. If we're using GNU compilers, use -O3 (since it GNU

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University * Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
@ -19,6 +19,8 @@
#ifndef OPAL_BIT_OPS_H #ifndef OPAL_BIT_OPS_H
#define OPAL_BIT_OPS_H #define OPAL_BIT_OPS_H
#include "opal/prefetch.h"
/** /**
* Calculates the highest bit in an integer * Calculates the highest bit in an integer
* *
@ -33,21 +35,35 @@
* *
* WARNING: *NO* error checking is performed. This is meant to be a * WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function. * fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
* of 17 cycles (on average value, with start=32)
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/ */
static inline int opal_hibit(int value, int start) static inline int opal_hibit(int value, int start)
{ {
unsigned int mask; unsigned int mask;
--start; #if OPAL_C_HAVE_BUILTIN_CLZ
mask = 1 << start; /* Only look at the part that the caller wanted looking at */
mask = value & ((1 << start) - 1);
for (; start >= 0; --start, mask >>= 1) { if (OPAL_UNLIKELY (0 == mask)) {
if (value & mask) { return -1;
break;
} }
}
start = (8*sizeof(int)-1) - __builtin_clz(mask);
#else
--start;
mask = 1 << start;
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
}
}
#endif
return start; return start;
} }
@ -63,16 +79,84 @@ static inline int opal_hibit(int value, int start)
* *
* WARNING: *NO* error checking is performed. This is meant to be a * WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function. * fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/ */
static inline int opal_cube_dim(int value) static inline int opal_cube_dim(int value)
{ {
int dim, size; int dim, size;
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) { #if OPAL_C_HAVE_BUILTIN_CLZ
continue; if (OPAL_UNLIKELY (1 >= value)) {
return 0;
} }
size = 8 * sizeof(int);
dim = size - __builtin_clz(value-1);
#else
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
#endif
return dim; return dim;
} }
/**
* @brief Returns next power-of-two of the given value.
*
* @param value The integer value to return power of 2
*
* @returns The next power of two
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY (0 == value)) {
return 1;
}
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
#else
for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
#endif
return power2;
}
/**
* @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
*
* @param value The integer value to return power of 2
*
* @returns The next power of two (inclusive)
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo_inclusive(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY (1 >= value)) {
return 1;
}
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
#else
for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
#endif
return power2;
}
#endif /* OPAL_BIT_OPS_H */ #endif /* OPAL_BIT_OPS_H */

Просмотреть файл

@ -5,7 +5,7 @@
# Copyright (c) 2004-2005 The University of Tennessee and The University # Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights # of Tennessee Research Foundation. All rights
# reserved. # reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
@ -31,8 +31,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/test/support
check_PROGRAMS = \ check_PROGRAMS = \
opal_sos \ opal_bit_ops \
opal_path_nfs opal_path_nfs \
opal_sos
TESTS = \ TESTS = \
$(check_PROGRAMS) $(check_PROGRAMS)
@ -66,6 +67,13 @@ TESTS = \
# $(top_builddir)/test/support/libsupport.a # $(top_builddir)/test/support/libsupport.a
#opal_basename_DEPENDENCIES = $(opal_basename_LDADD) #opal_basename_DEPENDENCIES = $(opal_basename_LDADD)
opal_bit_ops_SOURCES = opal_bit_ops.c
opal_bit_ops_LDADD = \
$(top_builddir)/opal/libopen-pal.la \
$(top_builddir)/test/support/libsupport.a
opal_bit_ops_DEPENDENCIES = $(opal_path_nfs_LDADD)
opal_path_nfs_SOURCES = opal_path_nfs.c opal_path_nfs_SOURCES = opal_path_nfs.c
opal_path_nfs_LDADD = \ opal_path_nfs_LDADD = \
$(top_builddir)/opal/libopen-pal.la \ $(top_builddir)/opal/libopen-pal.la \

217
test/util/opal_bit_ops.c Обычный файл
Просмотреть файл

@ -0,0 +1,217 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include <string.h>
#include "support.h"
#include "opal/util/bit_ops.h"
#include "opal/util/output.h"
/*
#define DEBUG
*/
static int test_hibit(int value, int start);
static int test_cube_dim(int value);
static int test_next_poweroftwo(int value);
static int test_next_poweroftwo_inclusive(int value);
int main(int argc, char* argv[])
{
int i;
int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */};
test_init("opal_bit_ops()");
#ifdef DEBUG
printf ("Test usage: ./opal_bit_ops [VALUES]\n");
#endif
if (1 < argc) {
for (i = 1; i < argc; i++) {
int value;
value = atoi (argv[i]);
printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n",
i, value, test_next_poweroftwo(value) ? "correct" : "wrong");
}
}
for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {
test_hibit (vals[i], 8 * sizeof(int) -2);
test_hibit (vals[i], 3);
test_cube_dim (vals[i]);
test_next_poweroftwo (vals[i]);
test_next_poweroftwo_inclusive (vals[i]);
}
/* All done */
return test_finalize();
}
/* REFERENCE FUNCTION */
static int hibit(int value, int start)
{
unsigned int mask;
--start;
mask = 1 << start;
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
}
}
return start;
}
static int test_hibit(int value, int start)
{
int out;
int bit = hibit (value, start);
#ifdef DEBUG
printf ("test_hibit(): value:%d expect:%d\n",
value, bit);
#endif
if (bit == (out = opal_hibit (value, start))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n",
start, value, bit, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int cube_dim(int value)
{
int dim, size;
for (dim = 0, size = 1; size < value; ++dim, size <<= 1);
return dim;
}
static int test_cube_dim(int value)
{
int out;
int dim = cube_dim (value);
#ifdef DEBUG
printf ("test_cube_dim(): value:%d expect:%d\n",
value, dim);
#endif
if (dim == (out = opal_cube_dim (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n",
value, dim, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int next_poweroftwo(int value)
{
int power2;
for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */;
return power2;
}
static int test_next_poweroftwo(int value)
{
int out;
int power2 = next_poweroftwo (value);
#ifdef DEBUG
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
value, power2);
#endif
if (power2 == (out = opal_next_poweroftwo (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n",
value, power2, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int next_poweroftwo_inclusive(int value)
{
int power2 = 1;
while ( power2 < value )
power2 <<= 1;
return power2;
}
static int test_next_poweroftwo_inclusive(int value)
{
int out;
int power2 = next_poweroftwo_inclusive (value);
#ifdef DEBUG
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
value, power2);
#endif
if (power2 == (out = opal_next_poweroftwo_inclusive (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n",
value, power2, out);
test_failure(msg);
free(msg);
}
return 0;
}