2004-05-25 01:45:00 +04:00
|
|
|
#
|
2005-11-05 22:57:48 +03:00
|
|
|
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
# University Research and Technology
|
|
|
|
# Corporation. All rights reserved.
|
|
|
|
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
# of Tennessee Research Foundation. All rights
|
|
|
|
# reserved.
|
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-12 02:49:01 +04:00
|
|
|
# Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
|
2004-11-28 23:09:25 +03:00
|
|
|
# University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
# All rights reserved.
|
2012-04-06 18:23:13 +04:00
|
|
|
# Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
|
|
|
# reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
# $COPYRIGHT$
|
|
|
|
#
|
|
|
|
# Additional copyrights may follow
|
|
|
|
#
|
2004-05-25 01:45:00 +04:00
|
|
|
# $HEADER$
|
|
|
|
#
|
|
|
|
|
2005-09-24 05:17:32 +04:00
|
|
|
AM_CPPFLAGS = -I$(top_srcdir)/test/support
|
2004-05-25 01:45:00 +04:00
|
|
|
|
2010-02-12 01:07:07 +03:00
|
|
|
#check_PROGRAMS = \
|
|
|
|
# ompi_numtostr \
|
|
|
|
# opal_error \
|
|
|
|
# opal_if \
|
|
|
|
# opal_os_path \
|
|
|
|
# opal_timer \
|
|
|
|
# opal_os_create_dirpath \
|
|
|
|
# opal_argv \
|
|
|
|
# opal_basename \
|
|
|
|
# opal_path_nfs
|
|
|
|
|
|
|
|
|
2005-03-22 07:25:01 +03:00
|
|
|
check_PROGRAMS = \
|
2012-09-12 18:38:37 +04:00
|
|
|
opal_bit_ops opal_path_nfs
|
2004-05-25 01:45:00 +04:00
|
|
|
|
2005-03-22 07:25:01 +03:00
|
|
|
TESTS = \
|
|
|
|
$(check_PROGRAMS)
|
|
|
|
|
2010-02-12 01:07:07 +03:00
|
|
|
#ompi_numtostr_SOURCES = ompi_numtostr.c
|
|
|
|
#ompi_numtostr_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#ompi_numtostr_DEPENDENCIES = $(ompi_numtostr_LDADD)
|
|
|
|
|
|
|
|
#opal_error_SOURCES = opal_error.c
|
|
|
|
#opal_error_LDADD = \
|
|
|
|
# $(top_builddir)/orte/libopen-rte.la \
|
|
|
|
#opal_error_DEPENDENCIES = $(opal_error_LDADD)
|
|
|
|
|
|
|
|
#opal_if_SOURCES = opal_if.c
|
|
|
|
#opal_if_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_if_DEPENDENCIES = $(opal_if_LDADD)
|
|
|
|
|
|
|
|
#opal_argv_SOURCES = opal_argv.c
|
|
|
|
#opal_argv_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_argv_DEPENDENCIES = $(opal_argv_LDADD)
|
|
|
|
|
|
|
|
#opal_basename_SOURCES = opal_basename.c
|
|
|
|
#opal_basename_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_basename_DEPENDENCIES = $(opal_basename_LDADD)
|
2005-04-14 18:04:41 +04:00
|
|
|
|
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-12 02:49:01 +04:00
|
|
|
opal_bit_ops_SOURCES = opal_bit_ops.c
|
|
|
|
opal_bit_ops_LDADD = \
|
|
|
|
$(top_builddir)/opal/libopen-pal.la \
|
|
|
|
$(top_builddir)/test/support/libsupport.a
|
|
|
|
opal_bit_ops_DEPENDENCIES = $(opal_path_nfs_LDADD)
|
|
|
|
|
|
|
|
|
2012-09-12 18:38:37 +04:00
|
|
|
opal_path_nfs_SOURCES = opal_path_nfs.c
|
|
|
|
opal_path_nfs_LDADD = \
|
|
|
|
$(top_builddir)/opal/libopen-pal.la \
|
|
|
|
$(top_builddir)/test/support/libsupport.a
|
|
|
|
opal_path_nfs_DEPENDENCIES = $(opal_path_nfs_LDADD)
|
2010-02-11 02:18:29 +03:00
|
|
|
|
2010-02-12 01:07:07 +03:00
|
|
|
#opal_os_path_SOURCES = opal_os_path.c
|
|
|
|
#opal_os_path_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_os_path_DEPENDENCIES = $(opal_os_path_LDADD)
|
|
|
|
|
|
|
|
#opal_timer_SOURCES = opal_timer.c
|
|
|
|
#opal_timer_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_timer_DEPENDENCIES = $(opal_timer_LDADD)
|
|
|
|
|
|
|
|
|
|
|
|
#orte_sys_info_SOURCES = orte_sys_info.c
|
|
|
|
#orte_sys_info_LDADD = \
|
|
|
|
# $(top_builddir)/orte/libopen-rte.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#orte_sys_info_DEPENDENCIES = $(orte_sys_info_LDADD)
|
|
|
|
|
|
|
|
#opal_os_create_dirpath_SOURCES = opal_os_create_dirpath.c
|
|
|
|
#opal_os_create_dirpath_LDADD = \
|
|
|
|
# $(top_builddir)/opal/libopen-pal.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#opal_os_create_dirpath_DEPENDENCIES = $(opal_os_create_dirpath_LDADD)
|
|
|
|
|
|
|
|
#orte_session_dir_SOURCES = orte_session_dir.c
|
|
|
|
#orte_session_dir_LDADD = \
|
|
|
|
# $(top_builddir)/orte/libopen-rte.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#orte_session_dir_DEPENDENCIES = $(orte_session_dir_LDADD)
|
|
|
|
|
|
|
|
#orte_universe_setup_file_io_SOURCES = orte_universe_setup_file_io.c
|
|
|
|
#orte_universe_setup_file_io_LDADD = \
|
|
|
|
# $(top_builddir)/orte/libopen-rte.la \
|
|
|
|
# $(top_builddir)/test/support/libsupport.a
|
|
|
|
#orte_universe_setup_file_io_DEPENDENCIES = $(orte_universe_setup_file_io_LDADD)
|
2005-05-23 18:22:35 +04:00
|
|
|
|
2005-08-22 03:48:12 +04:00
|
|
|
clean-local:
|
2012-09-13 07:20:05 +04:00
|
|
|
rm -f test_session_dir_out test-file opal_path_nfs.out
|