1
1

- Check, whether the compiler supports __builtin_clz (count leading

zeroes);
   if so, use it for bit-operations like opal_cube_dim and opal_hibit.
   Implement two versions of power-of-two.
   In case of opal_next_poweroftwo, this reduces the average execution
   time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
   measured rdtsc, with loop over 2^27 values).
   Numbers for other functions are similar (but of course heavily depend
   on the usage, e.g. opal_hibit() with a start of 4 does not save
   much).  The bsr instruction on AMD Opteron is also not as fast.

 - Replace various places where the next power-of-two is computed.
   
   Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
   Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.

This commit was SVN r25270.
Этот коммит содержится в:
Rainer Keller 2011-10-11 22:49:01 +00:00
родитель 74c88a9e48
Коммит 4e6a6fc146
16 изменённых файлов: 375 добавлений и 60 удалений

Просмотреть файл

@ -26,10 +26,11 @@
#include <string.h>
#include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "btl_openib.h"
#include "btl_openib_mca.h"
#include "btl_openib_ini.h"
@ -554,16 +555,10 @@ int btl_openib_register_mca_params(void)
&mca_btl_openib_module.super));
/* setup all the qp stuff */
mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
/* round mid_qp_size to smallest power of two */
for(i = 31; i > 0; i--) {
if(!(mid_qp_size & (1<<i))) {
continue;
}
mid_qp_size = (1<<i);
break;
}
mid_qp_size = opal_next_poweroftwo (mca_btl_openib_module.super.btl_eager_limit / 4) >> 1;
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
if(mid_qp_size <= 128) {
mid_qp_size = 1024;
}

Просмотреть файл

@ -39,6 +39,7 @@
#include "knem_io.h"
#endif /* OMPI_BTL_SM_HAVE_KNEM */
#include "opal/util/bit_ops.h"
#include "opal/class/opal_free_list.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/common/sm/common_sm.h"
@ -265,9 +266,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
int i, qsize;
/* figure out the queue size (a power of two that is at least 1) */
qsize = 1;
while ( qsize < fifo_size )
qsize <<= 1;
qsize = opal_next_poweroftwo_inclusive (fifo_size);
/* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc(

Просмотреть файл

@ -43,6 +43,7 @@
#include "ompi/constants.h"
#include "opal/mca/event/event.h"
#include "opal/util/bit_ops.h"
#include "opal/util/output.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
@ -225,13 +226,9 @@ static int sm_register(void)
static int mca_btl_sm_component_open(void)
{
mca_btl_sm_component.sm_max_btls = 1;
/* make sure the number of fifos is a power of 2 */
{
int i = 1;
while ( i < mca_btl_sm_component.nfifos )
i <<= 1;
mca_btl_sm_component.nfifos = i;
}
mca_btl_sm_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_sm_component.nfifos);
/* make sure that queue size and lazy free parameter are compatible */
if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) )

Просмотреть файл

@ -26,10 +26,11 @@
#include <string.h>
#include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "btl_wv.h"
#include "btl_wv_mca.h"
#include "btl_wv_ini.h"
@ -471,16 +472,10 @@ int btl_wv_register_mca_params(void)
&mca_btl_wv_module.super));
/* setup all the qp stuff */
mid_qp_size = mca_btl_wv_module.super.btl_eager_limit / 4;
/* round mid_qp_size to smallest power of two */
for(i = 31; i > 0; i--) {
if(!(mid_qp_size & (1<<i))) {
continue;
}
mid_qp_size = (1<<i);
break;
}
mid_qp_size = opal_next_poweroftwo (mca_btl_wv_module.super.btl_eager_limit / 4) >> 1;
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
if(mid_qp_size <= 128) {
mid_qp_size = 1024;
}

Просмотреть файл

@ -24,6 +24,7 @@
#include <errno.h>
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_tags.h"
@ -112,7 +113,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) &&
(buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) {
int tmp_size = 1, remain = 0, tmp_rank;
int tmp_size, remain = 0, tmp_rank;
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */
recv_buf_free = (char*) malloc(buf_size);
@ -133,7 +134,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
/* figure out power of two mapping: grow until larger than
comm size, then go back one, to get the largest power of
two less than comm size */
while (tmp_size <= size) tmp_size <<= 1;
tmp_size = opal_next_poweroftwo(size);
tmp_size >>= 1;
remain = size - tmp_size;

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h"
@ -271,7 +272,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
for (pow2size = 1; pow2size <= size; pow2size <<=1);
pow2size = opal_next_poweroftwo (size);
pow2size >>=1;
/* Current implementation only handles power-of-two number of processes.

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h"
@ -170,7 +171,8 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
tmprecv = (char*) rbuf;
/* Determine nearest power of two less than or equal to size */
for (adjsize = 0x1; adjsize <= size; adjsize <<= 1); adjsize = adjsize >> 1;
adjsize = opal_next_poweroftwo (size);
adjsize >>= 1;
/* Handle non-power-of-two case:
- Even ranks less than 2 * extra_ranks send their data to (rank + 1), and

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
@ -134,7 +135,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
rank));
/* do nearest power of 2 less than size calc */
for( adjsize = 1; adjsize <= size; adjsize <<= 1 );
adjsize = opal_next_poweroftwo(size);
adjsize >>= 1;
/* if size is not exact power of two, perform an extra step */
@ -354,7 +355,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
rank));
/* Find the nearest power of 2 of the communicator size. */
for(depth = 1; depth < size; depth <<= 1 );
depth = opal_next_poweroftwo_inclusive(size);
for (jump=1; jump<depth; jump<<=1) {
partner = rank ^ jump;

Просмотреть файл

@ -20,6 +20,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
@ -489,7 +490,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
total_message_size *= dsize;
/* compute the nearest power of 2 */
for (pow2 = 1; pow2 < comm_size; pow2 <<= 1);
pow2 = opal_next_poweroftwo_inclusive (comm_size);
if ((total_message_size <= small_message_size) ||
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
@ -540,7 +541,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
" rank %d com_size %d msg_length %lu",
ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
pow2_size = opal_next_poweroftwo_inclusive (communicator_size);
/* Decision based on MX 2Gb results from Grig cluster at
The University of Tennesse, Knoxville

Просмотреть файл

@ -21,6 +21,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h"
@ -132,7 +133,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
mca_coll_base_module_t *module)
{
int i, rank, size, count, err = OMPI_SUCCESS;
int tmp_size = 1, remain = 0, tmp_rank;
int tmp_size, remain = 0, tmp_rank;
int *disps = NULL;
ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
char *recv_buf = NULL, *recv_buf_free = NULL;
@ -189,7 +190,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* figure out power of two mapping: grow until larger than
comm size, then go back one, to get the largest power of
two less than comm size */
while (tmp_size <= size) tmp_size <<= 1;
tmp_size = opal_next_poweroftwo (size);
tmp_size >>= 1;
remain = size - tmp_size;

Просмотреть файл

@ -19,6 +19,7 @@
#include "ompi_config.h"
#include "mpi.h"
#include "opal/util/bit_ops.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/base/coll_tags.h"
@ -363,7 +364,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
if( index < 0 ) index += size;
while( mask <= index ) mask <<= 1;
mask = opal_next_poweroftwo(index);
/* Now I can compute my father rank */
if( root == rank ) {

Просмотреть файл

@ -25,6 +25,7 @@
#include "opal/class/opal_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/constants.h"
#include "opal/util/bit_ops.h"
/*
* opal_hash_table_t
@ -70,12 +71,7 @@ static void opal_hash_table_destruct(opal_hash_table_t* ht)
int opal_hash_table_init(opal_hash_table_t* ht, size_t table_size)
{
size_t i;
size_t power2 = 1;
size_t tmp = table_size;
while(tmp) {
tmp >>= 1;
power2 <<= 1;
}
size_t power2 = opal_next_poweroftwo (table_size);
ht->ht_mask = power2-1;
ht->ht_table = (opal_list_t *)malloc(power2 * sizeof(opal_list_t));

Просмотреть файл

@ -246,7 +246,7 @@ AC_DEFUN([OPAL_SETUP_CC],[
have_cc_builtin_expect=0
fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_EXPECT], [$have_cc_builtin_expect],
[Whether C compiler supports __builtin_expect])
[Whether C compiler supports __builtin_expect])
# see if the C compiler supports __builtin_prefetch
AC_CACHE_CHECK([if $CC supports __builtin_prefetch],
@ -262,7 +262,23 @@ AC_DEFUN([OPAL_SETUP_CC],[
have_cc_builtin_prefetch=0
fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_PREFETCH], [$have_cc_builtin_prefetch],
[Whether C compiler supports __builtin_prefetch])
[Whether C compiler supports __builtin_prefetch])
# see if the C compiler supports __builtin_clz
AC_CACHE_CHECK([if $CC supports __builtin_clz],
[ompi_cv_cc_supports___builtin_clz],
[AC_TRY_LINK([],
[int value = 0xffff; /* we know we have 16 bits set */
if ((8*sizeof(int)-16) != __builtin_clz(value)) return 0;],
[ompi_cv_cc_supports___builtin_clz="yes"],
[ompi_cv_cc_supports___builtin_clz="no"])])
if test "$ompi_cv_cc_supports___builtin_clz" = "yes" ; then
have_cc_builtin_clz=1
else
have_cc_builtin_clz=0
fi
AC_DEFINE_UNQUOTED([OPAL_C_HAVE_BUILTIN_CLZ], [$have_cc_builtin_clz],
[Whether C compiler supports __builtin_clz])
# Preload the optflags for the case where the user didn't specify
# any. If we're using GNU compilers, use -O3 (since it GNU

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
@ -19,6 +19,8 @@
#ifndef OPAL_BIT_OPS_H
#define OPAL_BIT_OPS_H
#include "opal/prefetch.h"
/**
* Calculates the highest bit in an integer
*
@ -33,21 +35,35 @@
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
* of 17 cycles (on average value, with start=32)
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_hibit(int value, int start)
{
unsigned int mask;
unsigned int mask;
--start;
mask = 1 << start;
#if OPAL_C_HAVE_BUILTIN_CLZ
/* Only look at the part that the caller wanted looking at */
mask = value & ((1 << start) - 1);
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
if (OPAL_UNLIKELY (0 == mask)) {
return -1;
}
}
start = (8*sizeof(int)-1) - __builtin_clz(mask);
#else
--start;
mask = 1 << start;
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
}
}
#endif
return start;
return start;
}
@ -63,16 +79,84 @@ static inline int opal_hibit(int value, int start)
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_cube_dim(int value)
{
int dim, size;
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) {
continue;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY (1 >= value)) {
return 0;
}
size = 8 * sizeof(int);
dim = size - __builtin_clz(value-1);
#else
for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
#endif
return dim;
}
/**
* @brief Returns next power-of-two of the given value.
*
* @param value The integer value to return power of 2
*
* @returns The next power of two
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY (0 == value)) {
return 1;
}
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
#else
for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
#endif
return power2;
}
/**
* @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
*
* @param value The integer value to return power of 2
*
* @returns The next power of two (inclusive)
*
* WARNING: *NO* error checking is performed. This is meant to be a
* fast inline function.
* Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
* compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
*/
static inline int opal_next_poweroftwo_inclusive(int value)
{
int power2;
#if OPAL_C_HAVE_BUILTIN_CLZ
if (OPAL_UNLIKELY (1 >= value)) {
return 1;
}
power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
#else
for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
#endif
return power2;
}
#endif /* OPAL_BIT_OPS_H */

Просмотреть файл

@ -5,7 +5,7 @@
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
@ -31,8 +31,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/test/support
check_PROGRAMS = \
opal_sos \
opal_path_nfs
opal_bit_ops \
opal_path_nfs \
opal_sos
TESTS = \
$(check_PROGRAMS)
@ -66,6 +67,13 @@ TESTS = \
# $(top_builddir)/test/support/libsupport.a
#opal_basename_DEPENDENCIES = $(opal_basename_LDADD)
opal_bit_ops_SOURCES = opal_bit_ops.c
opal_bit_ops_LDADD = \
$(top_builddir)/opal/libopen-pal.la \
$(top_builddir)/test/support/libsupport.a
opal_bit_ops_DEPENDENCIES = $(opal_path_nfs_LDADD)
opal_path_nfs_SOURCES = opal_path_nfs.c
opal_path_nfs_LDADD = \
$(top_builddir)/opal/libopen-pal.la \

217
test/util/opal_bit_ops.c Обычный файл
Просмотреть файл

@ -0,0 +1,217 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include <string.h>
#include "support.h"
#include "opal/util/bit_ops.h"
#include "opal/util/output.h"
/*
#define DEBUG
*/
static int test_hibit(int value, int start);
static int test_cube_dim(int value);
static int test_next_poweroftwo(int value);
static int test_next_poweroftwo_inclusive(int value);
int main(int argc, char* argv[])
{
int i;
int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */};
test_init("opal_bit_ops()");
#ifdef DEBUG
printf ("Test usage: ./opal_bit_ops [VALUES]\n");
#endif
if (1 < argc) {
for (i = 1; i < argc; i++) {
int value;
value = atoi (argv[i]);
printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n",
i, value, test_next_poweroftwo(value) ? "correct" : "wrong");
}
}
for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {
test_hibit (vals[i], 8 * sizeof(int) -2);
test_hibit (vals[i], 3);
test_cube_dim (vals[i]);
test_next_poweroftwo (vals[i]);
test_next_poweroftwo_inclusive (vals[i]);
}
/* All done */
return test_finalize();
}
/* REFERENCE FUNCTION */
static int hibit(int value, int start)
{
unsigned int mask;
--start;
mask = 1 << start;
for (; start >= 0; --start, mask >>= 1) {
if (value & mask) {
break;
}
}
return start;
}
static int test_hibit(int value, int start)
{
int out;
int bit = hibit (value, start);
#ifdef DEBUG
printf ("test_hibit(): value:%d expect:%d\n",
value, bit);
#endif
if (bit == (out = opal_hibit (value, start))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n",
start, value, bit, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int cube_dim(int value)
{
int dim, size;
for (dim = 0, size = 1; size < value; ++dim, size <<= 1);
return dim;
}
static int test_cube_dim(int value)
{
int out;
int dim = cube_dim (value);
#ifdef DEBUG
printf ("test_cube_dim(): value:%d expect:%d\n",
value, dim);
#endif
if (dim == (out = opal_cube_dim (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n",
value, dim, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int next_poweroftwo(int value)
{
int power2;
for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */;
return power2;
}
static int test_next_poweroftwo(int value)
{
int out;
int power2 = next_poweroftwo (value);
#ifdef DEBUG
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
value, power2);
#endif
if (power2 == (out = opal_next_poweroftwo (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n",
value, power2, out);
test_failure(msg);
free(msg);
}
return 0;
}
/* REFERENCE FUNCTION */
static int next_poweroftwo_inclusive(int value)
{
int power2 = 1;
while ( power2 < value )
power2 <<= 1;
return power2;
}
static int test_next_poweroftwo_inclusive(int value)
{
int out;
int power2 = next_poweroftwo_inclusive (value);
#ifdef DEBUG
printf ("test_next_poweroftwo(): value:%d expect:%d\n",
value, power2);
#endif
if (power2 == (out = opal_next_poweroftwo_inclusive (value))) {
test_success();
return 1;
} else {
char * msg;
asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n",
value, power2, out);
test_failure(msg);
free(msg);
}
return 0;
}