diff --git a/CMakeLists.txt b/CMakeLists.txt index e333cab7b37c5466067f1cd5f05c9385e5fa120c..a496109044e05590d8458a73f2c2a5e7c2d78074 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ dune_register_package_flags(LIBRARIES dunecodegen) dune_enable_all_packages() +add_subdirectory(doc) add_subdirectory(dune/codegen) add_subdirectory(cmake/modules) diff --git a/bin/donkey_benchmark_execution_wrapper.py b/bin/donkey_benchmark_execution_wrapper.py index d383963318291ae947c70b1f083c320a60250303..22835c7cbce61336168396007e73703394eb7433 100755 --- a/bin/donkey_benchmark_execution_wrapper.py +++ b/bin/donkey_benchmark_execution_wrapper.py @@ -5,8 +5,16 @@ import sys import subprocess import os +os.environ['DUNE_CODEGEN_THREADS'] = '20' + # Run the actual command -command = "srun -p haswell10c -n 20 -c 2 --cpu_bin=verbose,core".split() + +# The actual measurements will be called like this (mpi parallelism) +# command = "srun -p haswell10c -n 20 -c 2 --cpu_bin=verbose,core".split() + +# Command for autotune benchmarks with threading +command = "srun -p haswell10c -n 1 -c 20 --hint=nomultithread --cpu_bin=verbose,core".split() + command.extend(sys.argv[1:]) ret = subprocess.call(command) diff --git a/cmake/modules/DuneCodegenMacros.cmake b/cmake/modules/DuneCodegenMacros.cmake index 61713109b32cc1af4716b0f9f133879592fec49c..da3225866785c75a8cf73e6aa78b6e3e0eea42f9 100644 --- a/cmake/modules/DuneCodegenMacros.cmake +++ b/cmake/modules/DuneCodegenMacros.cmake @@ -1,6 +1,6 @@ # File for module specific CMake tests. # -# .. cmake_function:: add_generated_executable +# .. cmake_function:: dune_add_generated_executable # # .. cmake_param:: UFLFILE # :single: @@ -22,19 +22,20 @@ # The name given to the added executable target. # # .. cmake_param:: SOURCE +# :single: # # The cc source file to build from. If omitted, a minimal # source file and a driver file will be generated. # # .. cmake_param:: FORM_COMPILER_ARGS # :multi: -# :argname arg: +# :argname: arg # # Additional arguments as recognized by the form compiler. # # .. cmake_param:: DEPENDS # :multi: -# :argname dep: +# :argname: dep # # Additional dependencies of the generated executable (changes in those # will retrigger generation) @@ -57,7 +58,7 @@ # # .. cmake_param:: ANALYZE_GRID_COMMAND # :multi: -# :argname command: +# :argname: command # # Use this to pass a custom grid analysis command. This is necessary # if you use a custom grid generation methdod. The inifile and the @@ -104,7 +105,7 @@ else() endif() file(GLOB_RECURSE UFL2PDELAB_SOURCES ${UFL2PDELAB_GLOB_PATTERN}) -function(add_generated_executable) +function(dune_add_generated_executable) set(OPTIONS EXCLUDE_FROM_ALL ANALYZE_GRID) set(SINGLE TARGET SOURCE UFLFILE INIFILE) set(MULTI FORM_COMPILER_ARGS DEPENDS ANALYZE_GRID_COMMAND) @@ -112,15 +113,15 @@ function(add_generated_executable) cmake_parse_arguments(GEN "${OPTIONS}" "${SINGLE}" "${MULTI}" ${ARGN}) if(GEN_UNPARSED_ARGUMENTS) - message(FATAL_ERROR "Unrecognized arguments in add_generated_executable. This usually indicates a typo.") + message(FATAL_ERROR "Unrecognized arguments in dune_add_generated_executable. This usually indicates a typo.") endif() # Apply defaults and enforce requirements if(NOT GEN_TARGET) - message(FATAL_ERROR "Need to specify the TARGET parameter for add_generated_executable") + message(FATAL_ERROR "Need to specify the TARGET parameter for dune_add_generated_executable") endif() if(NOT GEN_UFLFILE) - message(FATAL_ERROR "Need to specify the UFLFILE parameter for add_generated_executable") + message(FATAL_ERROR "Need to specify the UFLFILE parameter for dune_add_generated_executable") endif() if(NOT IS_ABSOLUTE ${GEN_UFLFILE}) set(GEN_UFLFILE ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_UFLFILE}) diff --git a/cmake/modules/GeneratedSystemtests.cmake b/cmake/modules/GeneratedSystemtests.cmake index 206ce3bcbf10a457182ffbb1a5f8e415ea3fcb24..87af7161c8b9c8196dbddc3ae42e55bcffd3bd81 100644 --- a/cmake/modules/GeneratedSystemtests.cmake +++ b/cmake/modules/GeneratedSystemtests.cmake @@ -1,11 +1,85 @@ -# System testing for generated executables. All ideas taken from dune-testtools. +# System testing for generated executables. All ideas taken from dune-testtools. +# +# .. cmake_function:: dune_add_formcompiler_system_test +# +# .. cmake_param:: UFLFILE +# :single: +# :required: +# +# The UFL file to create the generate code from. +# +# .. cmake_param:: INIFILE +# :single: +# :required: +# +# The ini file that controls the form compilation process. +# It is expected to contain a [formcompiler] section. This +# file may contain meta ini annotations. +# +# .. cmake_param:: BASENAME +# :single: +# :required: +# +# The basename for the generated executables. +# +# .. cmake_param:: SCRIPT +# :single: +# +# The python script that decides about test failure/success. +# Defaults to a script that simply runs the program and checks +# the exit code. More scripts to be found in dune-testtools. +# +# .. cmake_param:: SOURCE +# :single: +# +# The cc source file to build from. If omitted, a minimal +# source file and a driver file will be generated. +# +# .. cmake_param:: CREATED_TARGETS +# :single: +# +# A variable name that should be filled with the list of generated +# targets. This can be used to modify these lateron. +# +# .. cmake_param:: DEPENDS +# :multi: +# :argname: dep +# +# Additional dependencies of the generated executable (changes in those +# will retrigger generation) +# +# .. cmake_param:: NO_TESTS +# :option: +# +# If given, code will be generated and built normally, but no tests will +# be added to the test suite. +# +# .. cmake_param:: ANALYZE_GRID +# :option: +# +# Set this option to enable code generation time grid analysis. +# This is useful to reduce the variety of sum factorization kernels +# in unstructured grids. Note that the grid analysis tool needs to +# be able to construct your grid from the given inifile. If you have +# a custom grid construction method, you can use ANALYZE_GRID_COMMAND +# instead. +# +# .. cmake_param:: ANALYZE_GRID_COMMAND +# :multi: +# :argname: command +# +# Use this to pass a custom grid analysis command. This is necessary +# if you use a custom grid generation methdod. The inifile and the +# outputfile will be appended to this command. You can use the analysis code in +# dune/codegen/sumfact/analyzegrid.hh to write your own tool. +# Specifying this option will automatically set ANALYZE_GRID. # function(dune_add_formcompiler_system_test) # parse arguments set(OPTION DEBUG NO_TESTS ANALYZE_GRID) - set(SINGLE INIFILE BASENAME SCRIPT UFLFILE SOURCE) - set(MULTI CREATED_TARGETS DEPENDS) + set(SINGLE INIFILE BASENAME SCRIPT UFLFILE SOURCE CREATED_TARGETS) + set(MULTI DEPENDS ANALYZE_GRID_COMMAND) cmake_parse_arguments(SYSTEMTEST "${OPTION}" "${SINGLE}" "${MULTI}" ${ARGN}) if(SYSTEMTEST_UNPARSED_ARGUMENTS) @@ -25,6 +99,10 @@ function(dune_add_formcompiler_system_test) if(SYSTEMTEST_ANALYZE_GRID) set(ANALYZE_GRID_STR "ANALYZE_GRID") endif() + set(ANALYZE_GRID_COMMAND_STR "") + if(SYSTEMTEST_ANALYZE_GRID_COMMAND) + set(ANALYZE_GRID_COMMAND_STR "ANALYZE_GRID_COMMAND ${SYSTEMTEST_ANALYZE_GRID_COMMAND}") + endif() # set a default for the script. call_executable.py just calls the executable. # There, it is also possible to hook in things depending on the inifile @@ -55,14 +133,15 @@ function(dune_add_formcompiler_system_test) endif() endif() - add_generated_executable(TARGET ${tname} - UFLFILE ${SYSTEMTEST_UFLFILE} - INIFILE "${CMAKE_CURRENT_BINARY_DIR}/${inifile}" - DEPENDS ${SYSTEMTEST_INIFILE} ${SYSTEMTEST_DEPENDS} - EXCLUDE_FROM_ALL - ${SOURCE} - ${ANALYZE_GRID_STR} - ) + dune_add_generated_executable(TARGET ${tname} + UFLFILE ${SYSTEMTEST_UFLFILE} + INIFILE "${CMAKE_CURRENT_BINARY_DIR}/${inifile}" + DEPENDS ${SYSTEMTEST_INIFILE} ${SYSTEMTEST_DEPENDS} + EXCLUDE_FROM_ALL + ${SOURCE} + ${ANALYZE_GRID_STR} + ${ANALYZE_GRID_COMMAND_STR} + ) # Enrich the target with preprocessor variables from the __static section # just the way that dune-testtools does. diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..956de868a985aec3af90d7263417eb16b9be09df --- /dev/null +++ b/doc/CMakeLists.txt @@ -0,0 +1 @@ +dune_cmake_sphinx_doc(MODULE_ONLY) diff --git a/python/dune/codegen/__init__.py b/python/dune/codegen/__init__.py index e892baf44c4c9fb301536c15c40059a5acc5d602..196419782487f33f13cf797d19c1fbdcbd13d946 100644 --- a/python/dune/codegen/__init__.py +++ b/python/dune/codegen/__init__.py @@ -10,3 +10,6 @@ import dune.codegen.loopy.symbolic # noqa import dune.codegen.pdelab # noqa import dune.codegen.sumfact # noqa import dune.codegen.blockstructured # noqa + +# Trigger imports that register finite elements in UFL +import dune.codegen.ufl.finiteelement # noqa diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index adda4c44eaad264f8e1f4500eeba2dbb6cf9ebf4..1e801f4550d17d89ef8972fe3ffa867d4a44ea25 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -8,7 +8,9 @@ import hashlib import logging import json from operator import mul +import pkg_resources from six.moves import reduce +import textwrap import time import loopy as lp @@ -318,113 +320,158 @@ def generate_standalone_kernel_code(kernel, signature, filename, transformations sig = p.sub(r'\1', sig) assert 'OpCounter' not in signature - with open(filename, 'w') as f: - if transformations: - f.write('// Transformations:\n') - for trafo in transformations: - f.write('// {}\n'.format(trafo)) - f.write('\n') - - # Write headers - headers = ['#include "config.h"', - '#include <iostream>', - '#include <fstream>', - '#include <random>', - '#include "benchmark/benchmark.h"', - '#include <dune/codegen/common/vectorclass.hh>', - '#include <dune/codegen/sumfact/horizontaladd.hh>', - ] - f.write("\n".join(headers)) - - # Get a list of the function argument names - arguments = sig[sig.find('(') + 1:sig.find(')')].split(',') - arguments = [a.split(' ')[-1] for a in arguments] - - global_args = [a for a in kernel.args if a.name not in arguments] - - # Declare global arguments - f.write('\n\n') - target = DuneTarget() - for g in global_args: - decl_info = g.decl_info(target, True, g.dtype) - for idi in decl_info: - ast_builder = target.get_device_ast_builder() - arg_decl = lp.target.c.POD(ast_builder, idi.dtype, idi.name) - arg_decl = ArrayOf(arg_decl, reduce(mul, g.shape)) - arg_decl = AlignedAttribute(g.dtype.itemsize * g.vector_size(target), arg_decl) - f.write('{}\n'.format(arg_decl)) - - # Generate function we want to benchmark - f.write('\n') - f.write(sig[0:sig.find(')') + 1]) - f.writelines(lp.generate_body(kernel)) - f.write('\n\n') - - # Generate function that will do the benchmarking - f.write('static void BM_sumfact_kernel(benchmark::State& state){\n') - - # Declare random generators + # Which transformations were applied + codegen_transformations = '' + if transformations: + codegen_transformations = '' + for trafo in transformations: + codegen_transformations += '// {}\n'.format(trafo) + + template = 'kernel_benchmark_template1.cc.in' + use_datasets = True + + # Old benchmark template + # template = 'kernel_benchmark_template0.cc.in' + # use_datasets = False + + template_filename = pkg_resources.resource_filename(__name__, template) + with open(template_filename, 'r') as f: + benchmark = f.read() + + # Find function arguments and global arguments + arguments = sig[sig.find('(') + 1:sig.find(')')].split(',') + arguments = [a.split(' ')[-1] for a in arguments] + global_args = [a for a in kernel.args if a.name not in arguments] + buffer_arguments = [a for a in arguments if a.startswith('buff')] + input_arguments = [a for a in arguments if a not in buffer_arguments] + + # Declare global arguments + codegen_declare_global_arguments = '' + target = DuneTarget() + for g in global_args: + decl_info = g.decl_info(target, True, g.dtype) + for idi in decl_info: + ast_builder = target.get_device_ast_builder() + arg_decl = lp.target.c.POD(ast_builder, idi.dtype, idi.name) + arg_decl = ArrayOf(arg_decl, reduce(mul, g.shape)) + arg_decl = AlignedAttribute(g.dtype.itemsize * g.vector_size(target), arg_decl) + codegen_declare_global_arguments += '{}\n'.format(arg_decl) + codegen_declare_global_arguments = textwrap.indent(codegen_declare_global_arguments, ' ') + + # Helper function for argument initialization + def _initialize_arg(arg): + if isinstance(arg, lp.ValueArg): + return [] real = type_floatingpoint() - lines = [' std::uniform_real_distribution<{}> unif(0,1);'.format(real), - ' std::uniform_int_distribution<int> unif_int(0,1);', - ' std::default_random_engine re;'] - f.write('\n'.join(lines) + '\n') - - # Declare function arguments - function_arguments = [a for a in kernel.args if a.name in arguments] - for arg in function_arguments: - if 'buffer' in arg.name: - byte_size = reduce(mul, arg.shape) * 8 - f.write(' char {}[{}] __attribute__ ((aligned ({})));\n'.format(arg.name, - byte_size, - arg.alignment),) - elif isinstance(arg, lp.ValueArg): - assert 'jacobian_offset' in arg.name - decl = arg.get_arg_decl(ast_builder) - decl = Initializer(decl, 'unif_int(re)') - f.write(' {}\n'.format(decl)) - else: - assert 'fastdg' in arg.name - size = reduce(mul, arg.shape) - min_stride = min([tag.stride for tag in arg.dim_tags]) - size *= min_stride - alignment = arg.dtype.itemsize - f.write(' {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real, - arg.name, - size, - alignment)) - - # Initialize arguments - def _initialize_arg(arg): - if isinstance(arg, lp.ValueArg): - return [] - real = type_floatingpoint() + size = reduce(mul, arg.shape) + fill_name = arg.name + '_fill' + lines = [' {}* {} = (double *) {};'.format(real, fill_name, arg.name), + ' for (std::size_t i=0; i<{}; ++i){{'.format(size), + ' {}[i] = unif(re);'.format(fill_name), + ' }'] + return lines + + # Initialize global arguments + codegen_initialize_global_arguments = '' + for arg in global_args: + lines = _initialize_arg(arg) + codegen_initialize_global_arguments += '\n'.join(lines) + '\n' + codegen_initialize_global_arguments = textwrap.indent(codegen_initialize_global_arguments, ' ') + + codegen_initialize_input = '' + + # Function we want to benchmark + codegen_benchmark_function = '' + codegen_benchmark_function += sig[0:sig.find(')') + 1] + codegen_benchmark_function += lp.generate_body(kernel) + codegen_benchmark_function = textwrap.indent(codegen_benchmark_function, ' ') + + # Declare function arguments + codegen_declare_arguments = [] + codegen_declare_input = [] + function_arguments = [a for a in kernel.args if a.name in arguments] + for arg in function_arguments: + if 'buffer' in arg.name: + byte_size = reduce(mul, arg.shape) * 8 + codegen_declare_arguments.append(' char {}[{}] __attribute__ ((aligned ({})));\n'.format(arg.name, + byte_size, + arg.alignment),) + elif isinstance(arg, lp.ValueArg): + assert 'jacobian_offset' in arg.name + decl = arg.get_arg_decl(ast_builder) + decl = Initializer(decl, 'unif_int(re)') + codegen_declare_arguments.append((' {}\n'.format(decl))) + else: + assert 'fastdg' in arg.name size = reduce(mul, arg.shape) - fill_name = arg.name + '_fill' - lines = [' {}* {} = (double *) {};'.format(real, fill_name, arg.name), - ' for (std::size_t i=0; i<{}; ++i){{'.format(size), - ' {}[i] = unif(re);'.format(fill_name), - ' }'] - return lines - - for arg in kernel.args: + min_stride = min([tag.stride for tag in arg.dim_tags]) + size *= min_stride + alignment = arg.dtype.itemsize + real = type_floatingpoint() + if use_datasets: + codegen_declare_input.append(('{} {}[datasets][{}] __attribute__ ((aligned ({})));\n'.format(real, + arg.name, + size, + alignment))) + else: + codegen_declare_input.append(('{} {}[{}] __attribute__ ((aligned ({})));\n'.format(real, + arg.name, + size, + alignment))) + + codegen_declare_arguments = ''.join(codegen_declare_arguments) + codegen_declare_arguments = textwrap.indent(codegen_declare_arguments, ' ') + codegen_declare_input = ''.join(codegen_declare_input) + codegen_declare_input = textwrap.indent(codegen_declare_input, ' ') + + # Initialize function arguments + codegen_initialize_arguments = '' + codegen_initialize_input = '' + for arg in function_arguments: + if 'fastdg' in arg.name: + if use_datasets: + lines = _initialize_arg(arg) + lines = [' ' + a for a in lines] + lines = [a.replace(arg.name + ';', arg.name + '[i];') for a in lines] + lines.insert(0, 'for(std::size_t i=0; i<datasets; ++i){') + lines.append('}') + codegen_initialize_input += '\n'.join(lines) + '\n' + else: + lines = _initialize_arg(arg) + codegen_initialize_arguments += '\n'.join(lines) + '\n' + else: lines = _initialize_arg(arg) - f.write('\n'.join(lines) + '\n') - - # Benchmark loop - function_call = kernel.name + '({})'.format(','.join(arguments)) - f.writelines([' for (auto _ : state){\n', - ' {};\n'.format(function_call), - ' }\n', - ]) - f.write('}\n') - - # Benchmark main - main = ['', - 'BENCHMARK(BM_sumfact_kernel);', - '', - 'BENCHMARK_MAIN();'] - f.write('\n'.join(main)) + codegen_initialize_arguments += '\n'.join(lines) + '\n' + codegen_initialize_arguments = textwrap.indent(codegen_initialize_arguments, ' ') + codegen_initialize_input = textwrap.indent(codegen_initialize_input, ' ') + + # Call the benchmark function + if use_datasets: + arguments_with_datasets = arguments.copy() + arguments_with_datasets = [a if 'fastdg' not in a else a + '[i]' for a in arguments] + codegen_call_benchmark_function = 'for (std::size_t i=0; i<datasets; ++i){\n' + codegen_call_benchmark_function += ' ' + kernel.name + '({})'.format(','.join(arguments_with_datasets)) + ';\n' + for arg in input_arguments: + codegen_call_benchmark_function += 'benchmark::DoNotOptimize({}[i][0]);\n'.format(arg) + codegen_call_benchmark_function += '}' + else: + codegen_call_benchmark_function = kernel.name + '({})'.format(','.join(arguments)) + ';\n' + codegen_call_benchmark_function = textwrap.indent(codegen_call_benchmark_function, ' ') + + # Replace placeholders in benchmark template + benchmark = benchmark.replace('${CODEGEN_TRANSFORMATIONS}', codegen_transformations) + benchmark = benchmark.replace('${CODEGEN_DECLARE_GLOBAL_ARGUMENTS}', codegen_declare_global_arguments) + benchmark = benchmark.replace('${CODEGEN_DECLARE_INPUT}', codegen_declare_input) + benchmark = benchmark.replace('${CODEGEN_INITIALIZE_GLOBAL_ARGUMENTS}', codegen_initialize_global_arguments) + benchmark = benchmark.replace('${CODEGEN_INITIALIZE_INPUT}', codegen_initialize_input) + benchmark = benchmark.replace('${CODEGEN_BENCHMARK_FUNCTION}', codegen_benchmark_function) + benchmark = benchmark.replace('${CODEGEN_DECLARE_ARGUMENTS}', codegen_declare_arguments) + benchmark = benchmark.replace('${CODEGEN_INITIALIZE_ARGUMENTS}', codegen_initialize_arguments) + benchmark = benchmark.replace('${CODEGEN_CALL_BENCHMARK_FUNCTION}', codegen_call_benchmark_function) + + # Write benchmark source file + with open(filename, 'w') as f: + f.writelines(benchmark) # Maybe turn opcounting on again set_option("opcounter", opcounting) @@ -491,6 +538,7 @@ def autotune_realization(sf=None, kernel=None, signature=None, transformations=N call.extend(compiler_invocation(executable, filename)) devnull = open(os.devnull, 'w') + os.environ['DUNE_CODEGEN_THREADS'] = '1' ret = subprocess.call(call, stdout=devnull, stderr=subprocess.STDOUT) if ret != 0: raise CodegenAutotuneError("Compilation of autotune executable failed. Invocation: {}".format(" ".join(call))) @@ -509,7 +557,7 @@ def autotune_realization(sf=None, kernel=None, signature=None, transformations=N call.append(executable) if get_option("autotune_google_benchmark"): call.append("--benchmark_out={}".format(logname)) - call.append("--benchmark_repetitions=10") + call.append("--benchmark_repetitions=5") # call.append("--benchmark_out_format=csv") else: call.append(logname) @@ -529,7 +577,9 @@ def autotune_realization(sf=None, kernel=None, signature=None, transformations=N data = json.load(json_file) minimal_time = 1e80 for b in data['benchmarks']: - if b['name'] == 'BM_sumfact_kernel': + if b['name'].endswith('_mean') or b['name'].endswith('_median') or b['name'].endswith('_stddev'): + pass + else: if b['cpu_time'] < minimal_time: minimal_time = b['cpu_time'] assert minimal_time < 1e80 diff --git a/python/dune/codegen/sumfact/kernel_benchmark_template0.cc.in b/python/dune/codegen/sumfact/kernel_benchmark_template0.cc.in new file mode 100644 index 0000000000000000000000000000000000000000..c4b49da5aec59006a9f007534dc7711b98ef0387 --- /dev/null +++ b/python/dune/codegen/sumfact/kernel_benchmark_template0.cc.in @@ -0,0 +1,37 @@ +// Transformations: +${CODEGEN_TRANSFORMATIONS} + +#include "config.h" +#include <iostream> +#include <fstream> +#include <random> +#include "benchmark/benchmark.h" +#include <dune/codegen/common/vectorclass.hh> +#include <dune/codegen/sumfact/horizontaladd.hh> + +${CODEGEN_DECLARE_GLOBAL_ARGUMENTS} + +${CODEGEN_BENCHMARK_FUNCTION} + +static void BM_sumfact_kernel(benchmark::State& state){ + std::uniform_real_distribution<double> unif(0,1); + std::uniform_int_distribution<int> unif_int(0,1); + std::default_random_engine re; + + ${CODEGEN_DECLARE_GLOBAL_ARGUMENTS} + ${CODEGEN_INITIALIZE_GLOBAL_ARGUMENTS} + + ${CODEGEN_DECLARE_INPUT} + ${CODEGEN_INITIALIZE_INPUT} + + ${CODEGEN_DECLARE_ARGUMENTS} + ${CODEGEN_INITIALIZE_ARGUMENTS} + + for (auto _ : state){ + ${CODEGEN_CALL_BENCHMARK_FUNCTION} + } +} + +${CODEGEN_BENCHMARK} + +BENCHMARK_MAIN(); diff --git a/python/dune/codegen/sumfact/kernel_benchmark_template1.cc.in b/python/dune/codegen/sumfact/kernel_benchmark_template1.cc.in new file mode 100644 index 0000000000000000000000000000000000000000..c03577d672123b3063020841b731fc1fd9780fce --- /dev/null +++ b/python/dune/codegen/sumfact/kernel_benchmark_template1.cc.in @@ -0,0 +1,58 @@ +// Transformations: +${CODEGEN_TRANSFORMATIONS} + +#include "config.h" + +#include <iostream> +#include <fstream> +#include <random> +#include <cstdlib> + +#include "benchmark/benchmark.h" +#include <dune/codegen/common/vectorclass.hh> +#include <dune/codegen/sumfact/horizontaladd.hh> + +const int datasets = 512; + +class Benchmark{ + ${CODEGEN_DECLARE_GLOBAL_ARGUMENTS} + + ${CODEGEN_DECLARE_INPUT} + +public: + Benchmark (){ + std::uniform_real_distribution<double> unif(0,1); + std::uniform_int_distribution<int> unif_int(0,1); + std::default_random_engine re; + + ${CODEGEN_INITIALIZE_GLOBAL_ARGUMENTS} + + ${CODEGEN_INITIALIZE_INPUT} + } + + ${CODEGEN_BENCHMARK_FUNCTION} + + void run_benchmark(){ + ${CODEGEN_DECLARE_ARGUMENTS} + + std::uniform_real_distribution<double> unif(0,1); + std::uniform_int_distribution<int> unif_int(0,1); + std::default_random_engine re; + + ${CODEGEN_INITIALIZE_ARGUMENTS} + + ${CODEGEN_CALL_BENCHMARK_FUNCTION} + } + +}; + +static void BM_sumfact_kernel(benchmark::State& state){ + Benchmark benchmark; + for (auto _ : state){ + benchmark.run_benchmark(); + } +} + +BENCHMARK(BM_sumfact_kernel) -> Threads(atoi(std::getenv("DUNE_CODEGEN_THREADS"))) -> UseRealTime(); + +BENCHMARK_MAIN(); diff --git a/python/dune/codegen/ufl/finiteelement.py b/python/dune/codegen/ufl/finiteelement.py new file mode 100644 index 0000000000000000000000000000000000000000..ae0ef35f2a3b30cd2837525447233dfe017cd76d --- /dev/null +++ b/python/dune/codegen/ufl/finiteelement.py @@ -0,0 +1,43 @@ +""" Registering finite elements that we have, but FEniCS does not """ + +from ufl import register_element, L2 + + +register_element("Discontinuous Gauss-Lobatto-Legendre", + "DGLL", + 0, + L2, + "identity", + (0, None), + ('interval',), + ) + + +register_element("Monomials", + "Monom", + 0, + L2, + "identity", + (0, None), + ('interval',), + ) + + +register_element("L2-Orthonormal Polynomials", + "OPB", + 0, + L2, + "identity", + (0, None), + ('interval',), + ) + + +register_element("Rannacher-Turek", + "RaTu", + 0, + L2, + "identity", + (1, 1), + ('quadrilateral', 'hexahedron'), + )