openmpi/test/util/opal_bit_ops.c

/*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, 
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
 * 
 * $HEADER$
 */

#include "opal_config.h"

#include <stdio.h>
#include <string.h>

#include "support.h"
#include "opal/util/bit_ops.h"
#include "opal/util/output.h"

/*
#define DEBUG
*/

static int test_hibit(int value, int start);
static int test_cube_dim(int value);
static int test_next_poweroftwo(int value);
static int test_next_poweroftwo_inclusive(int value);

int main(int argc, char* argv[])
{
    int i;
    int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */};
    test_init("opal_bit_ops()");

#ifdef DEBUG
    printf ("Test usage: ./opal_bit_ops [VALUES]\n");
#endif

    if (1 < argc) {
        for (i = 1; i < argc; i++) {
            int value;
            value = atoi (argv[i]);
            printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n",
                    i, value, test_next_poweroftwo(value) ? "correct" : "wrong");
        }
    }

    for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {
        test_hibit (vals[i], 8 * sizeof(int) -2);
        test_hibit (vals[i], 3);
        test_cube_dim (vals[i]);
        test_next_poweroftwo (vals[i]);
        test_next_poweroftwo_inclusive (vals[i]);
    }

    /* All done */
    return test_finalize();
}


/* REFERENCE FUNCTION */
static int hibit(int value, int start)
{
    unsigned int mask;

    --start;
    mask = 1 << start;

    for (; start >= 0; --start, mask >>= 1) {
        if (value & mask) {
            break;
        }
    }

    return start;
}

static int test_hibit(int value, int start)
{
    int out;
    int bit = hibit (value, start);

#ifdef DEBUG
    printf ("test_hibit(): value:%d expect:%d\n",
            value, bit);
#endif

    if (bit == (out = opal_hibit (value, start))) {
        test_success();
        return 1;
    } else {
        char * msg;
        asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n",
                 start, value, bit, out);
        test_failure(msg);
        free(msg);
    }
    return 0;
}


/* REFERENCE FUNCTION */
static int cube_dim(int value)
{
    int dim, size;

    for (dim = 0, size = 1; size < value; ++dim, size <<= 1);

    return dim;
}

static int test_cube_dim(int value)
{
    int out;
    int dim = cube_dim (value);

#ifdef DEBUG
    printf ("test_cube_dim(): value:%d expect:%d\n",
            value, dim);
#endif

    if (dim == (out = opal_cube_dim (value))) {
        test_success();
        return 1;
    } else {
        char * msg;
        asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n",
                 value, dim, out);
        test_failure(msg);
        free(msg);
    }
    return 0;
}


/* REFERENCE FUNCTION */
static int next_poweroftwo(int value)
{
    int power2;

    for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */;

    return power2;
}


static int test_next_poweroftwo(int value)
{
    int out;
    int power2 = next_poweroftwo (value);

#ifdef DEBUG
    printf ("test_next_poweroftwo(): value:%d expect:%d\n",
            value, power2);
#endif

    if (power2 == (out = opal_next_poweroftwo (value))) {
        test_success();
        return 1;
    } else {
        char * msg;
        asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n",
                 value, power2, out);
        test_failure(msg);
        free(msg);
    }
    return 0;
}


/* REFERENCE FUNCTION */
static int next_poweroftwo_inclusive(int value)
{
    int power2 = 1;

    while ( power2 < value )
        power2 <<= 1;

    return power2;
}

static int test_next_poweroftwo_inclusive(int value)
{
    int out;
    int power2 = next_poweroftwo_inclusive (value);

#ifdef DEBUG
    printf ("test_next_poweroftwo(): value:%d expect:%d\n",
            value, power2);
#endif

    if (power2 == (out = opal_next_poweroftwo_inclusive (value))) {
        test_success();
        return 1;
    } else {
        char * msg;
        asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n",
                 value, power2, out);
        test_failure(msg);
        free(msg);
    }

    return 0;
}
- Check, whether the compiler supports __builtin_clz (count leading zeroes); if so, use it for bit-operations like opal_cube_dim and opal_hibit. Implement two versions of power-of-two. In case of opal_next_poweroftwo, this reduces the average execution time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining, measured rdtsc, with loop over 2^27 values). Numbers for other functions are similar (but of course heavily depend on the usage, e.g. opal_hibit() with a start of 4 does not save much). The bsr instruction on AMD Opteron is also not as fast. - Replace various places where the next power-of-two is computed. Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes. This commit was SVN r25270. 2011-10-12 02:49:01 +04:00			`/*`
			`* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana`
			`* University Research and Technology`
			`* Corporation. All rights reserved.`
			`* Copyright (c) 2004-2005 The University of Tennessee and The University`
			`* of Tennessee Research Foundation. All rights`
			`* reserved.`
			`* Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,`
			`* University of Stuttgart. All rights reserved.`
			`* Copyright (c) 2004-2005 The Regents of the University of California.`
			`* All rights reserved.`
This is an opal test; it should include opal_config.h, not ompi_config.h. This matters if you autogen.pl --no-ompi. This commit was SVN r29855. 2013-12-11 07:31:25 +04:00			`* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.`
- Check, whether the compiler supports __builtin_clz (count leading zeroes); if so, use it for bit-operations like opal_cube_dim and opal_hibit. Implement two versions of power-of-two. In case of opal_next_poweroftwo, this reduces the average execution time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining, measured rdtsc, with loop over 2^27 values). Numbers for other functions are similar (but of course heavily depend on the usage, e.g. opal_hibit() with a start of 4 does not save much). The bsr instruction on AMD Opteron is also not as fast. - Replace various places where the next power-of-two is computed. Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes. This commit was SVN r25270. 2011-10-12 02:49:01 +04:00			`* $COPYRIGHT$`
			`*`
			`* Additional copyrights may follow`
			`*`
			`* $HEADER$`
			`*/`

This is an opal test; it should include opal_config.h, not ompi_config.h. This matters if you autogen.pl --no-ompi. This commit was SVN r29855. 2013-12-11 07:31:25 +04:00			`#include "opal_config.h"`
- Check, whether the compiler supports __builtin_clz (count leading zeroes); if so, use it for bit-operations like opal_cube_dim and opal_hibit. Implement two versions of power-of-two. In case of opal_next_poweroftwo, this reduces the average execution time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining, measured rdtsc, with loop over 2^27 values). Numbers for other functions are similar (but of course heavily depend on the usage, e.g. opal_hibit() with a start of 4 does not save much). The bsr instruction on AMD Opteron is also not as fast. - Replace various places where the next power-of-two is computed. Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes. This commit was SVN r25270. 2011-10-12 02:49:01 +04:00
			`#include <stdio.h>`
			`#include <string.h>`

			`#include "support.h"`
			`#include "opal/util/bit_ops.h"`
			`#include "opal/util/output.h"`

			`/*`
			`#define DEBUG`
			`*/`

			`static int test_hibit(int value, int start);`
			`static int test_cube_dim(int value);`
			`static int test_next_poweroftwo(int value);`
			`static int test_next_poweroftwo_inclusive(int value);`

			`int main(int argc, char* argv[])`
			`{`
			`int i;`
			`int vals[] = {0, 1, 2, 3, 4, 5, 127, 128, 129, (1 << 29) -1, (1 << 29), (1 << 29) +1, (1 << 30) -1, (1 << 30) /* And NOT (1 << 30) +1 */};`
			`test_init("opal_bit_ops()");`

			`#ifdef DEBUG`
			`printf ("Test usage: ./opal_bit_ops [VALUES]\n");`
			`#endif`

			`if (1 < argc) {`
			`for (i = 1; i < argc; i++) {`
			`int value;`
			`value = atoi (argv[i]);`
			`printf ("Testing %d. argument test_next_poweroftwo(%d): %s\n",`
			`i, value, test_next_poweroftwo(value) ? "correct" : "wrong");`
			`}`
			`}`

			`for (i = 0; i < (int)(sizeof(vals)/sizeof(vals[0])); i++) {`
			`test_hibit (vals[i], 8 * sizeof(int) -2);`
			`test_hibit (vals[i], 3);`
			`test_cube_dim (vals[i]);`
			`test_next_poweroftwo (vals[i]);`
			`test_next_poweroftwo_inclusive (vals[i]);`
			`}`

			`/* All done */`
			`return test_finalize();`
			`}`


			`/* REFERENCE FUNCTION */`
			`static int hibit(int value, int start)`
			`{`
			`unsigned int mask;`

			`--start;`
			`mask = 1 << start;`

			`for (; start >= 0; --start, mask >>= 1) {`
			`if (value & mask) {`
			`break;`
			`}`
			`}`

			`return start;`
			`}`

			`static int test_hibit(int value, int start)`
			`{`
			`int out;`
			`int bit = hibit (value, start);`

			`#ifdef DEBUG`
			`printf ("test_hibit(): value:%d expect:%d\n",`
			`value, bit);`
			`#endif`

			`if (bit == (out = opal_hibit (value, start))) {`
			`test_success();`
			`return 1;`
			`} else {`
			`char * msg;`
			`asprintf(&msg, "Mismatch for hibit (w/ start:%d): value:%d, expected:%d got:%d\n",`
			`start, value, bit, out);`
			`test_failure(msg);`
			`free(msg);`
			`}`
			`return 0;`
			`}`


			`/* REFERENCE FUNCTION */`
			`static int cube_dim(int value)`
			`{`
			`int dim, size;`

			`for (dim = 0, size = 1; size < value; ++dim, size <<= 1);`

			`return dim;`
			`}`

			`static int test_cube_dim(int value)`
			`{`
			`int out;`
			`int dim = cube_dim (value);`

			`#ifdef DEBUG`
			`printf ("test_cube_dim(): value:%d expect:%d\n",`
			`value, dim);`
			`#endif`

			`if (dim == (out = opal_cube_dim (value))) {`
			`test_success();`
			`return 1;`
			`} else {`
			`char * msg;`
			`asprintf(&msg, "Mismatch for cube_dim: value:%d, expected:%d got:%d\n",`
			`value, dim, out);`
			`test_failure(msg);`
			`free(msg);`
			`}`
			`return 0;`
			`}`


			`/* REFERENCE FUNCTION */`
			`static int next_poweroftwo(int value)`
			`{`
			`int power2;`

			`for (power2 = 1; value; value >>=1, power2 <<=1) /* empty */;`

			`return power2;`
			`}`


			`static int test_next_poweroftwo(int value)`
			`{`
			`int out;`
			`int power2 = next_poweroftwo (value);`

			`#ifdef DEBUG`
			`printf ("test_next_poweroftwo(): value:%d expect:%d\n",`
			`value, power2);`
			`#endif`

			`if (power2 == (out = opal_next_poweroftwo (value))) {`
			`test_success();`
			`return 1;`
			`} else {`
			`char * msg;`
			`asprintf(&msg, "Mismatch for power-of-two: value:%d, expected:%d got:%d\n",`
			`value, power2, out);`
			`test_failure(msg);`
			`free(msg);`
			`}`
			`return 0;`
			`}`



			`/* REFERENCE FUNCTION */`
			`static int next_poweroftwo_inclusive(int value)`
			`{`
			`int power2 = 1;`

			`while ( power2 < value )`
			`power2 <<= 1;`

			`return power2;`
			`}`

			`static int test_next_poweroftwo_inclusive(int value)`
			`{`
			`int out;`
			`int power2 = next_poweroftwo_inclusive (value);`

			`#ifdef DEBUG`
			`printf ("test_next_poweroftwo(): value:%d expect:%d\n",`
			`value, power2);`
			`#endif`

			`if (power2 == (out = opal_next_poweroftwo_inclusive (value))) {`
			`test_success();`
			`return 1;`
			`} else {`
			`char * msg;`
			`asprintf(&msg, "Mismatch for power-of-two-inclusive: value:%d, expected:%d got:%d\n",`
			`value, power2, out);`
			`test_failure(msg);`
			`free(msg);`
			`}`

			`return 0;`
			`}`