diff --git a/.gitmodules b/.gitmodules index 4cf4ba72af8959c4b35e4f3e734481602dde8ba8..1733ea4f5c7666f27dfc88945612f491bc6cc4df 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://gitlab.tiker.net/inducer/loopy.git [submodule "python/ufl"] path = python/ufl - url = https://parcomp-git.iwr.uni-heidelberg.de/dominic/ufl.git + url = https://bitbucket.org/fenics-project/ufl.git [submodule "python/pymbolic"] path = python/pymbolic url = https://github.com/inducer/pymbolic.git diff --git a/applications/knl/poisson_dg/knl_poisson_dg.mini b/applications/knl/poisson_dg/knl_poisson_dg.mini index bf478e854b00a14ae5c48567945051e92e22f02c..ba2f345fd39edcca22fbbb9aded8e4bd3193157c 100644 --- a/applications/knl/poisson_dg/knl_poisson_dg.mini +++ b/applications/knl/poisson_dg/knl_poisson_dg.mini @@ -36,18 +36,21 @@ name = {__name} extension = vtu [formcompiler] +instrumentation_level = 2, 3, 4 | expand +opcounter = 1, 0 | expand opcount +performance_measuring = 0, 1 | expand opcount +architecture = knl + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit vectorization_horizontal = 4 vectorization_vertical = 2 -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount +matrix_free = 1 +generate_jacobians = 0 quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval -architecture = knl -assure_statement_ordering = 1 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/knl/poisson_dg/poisson_dg.ufl b/applications/knl/poisson_dg/poisson_dg.ufl index 7b5c3e548f81688a0735256411fd54e9d49fa9f4..b5b2b90624dcd891f08bde4aba6cf68481b152f0 100644 --- a/applications/knl/poisson_dg/poisson_dg.ufl +++ b/applications/knl/poisson_dg/poisson_dg.ufl @@ -28,5 +28,3 @@ r = inner(grad(u), grad(v))*dx \ - f*v*dx \ - theta*g*inner(grad(v), n)*ds \ - gamma_ext*g*v*ds - -forms = [r] diff --git a/applications/knl/poisson_dg/verify.mini b/applications/knl/poisson_dg/verify.mini index d6dcdc8532a2809b4842ee2d078d72f7f20ff97a..b82dce6952c34792c633461bfadb834afd900885 100644 --- a/applications/knl/poisson_dg/verify.mini +++ b/applications/knl/poisson_dg/verify.mini @@ -9,6 +9,9 @@ name = {__name} extension = vtu [formcompiler] +architecture = knl + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 @@ -16,7 +19,6 @@ vectorization_strategy = explicit vectorization_horizontal = 4 vectorization_vertical = 2 quadrature_order = 6 -architecture = knl [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini index 8b746a3d3dc86ae9be6a96f5c95b8bda6c6a259b..4f3f5826f71fd9fc742efa4d88ae222e45759529 100644 --- a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini +++ b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini @@ -36,18 +36,21 @@ name = {__name} extension = vtu [formcompiler] +instrumentation_level = 2, 3, 4 | expand +opcounter = 1, 0 | expand opcount +performance_measuring = 0, 1 | expand opcount +architecture = knl + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit vectorization_horizontal = 4 vectorization_vertical = 2 -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval -architecture = knl -assure_statement_ordering = 1 +matrix_free = 1 +generate_jacobians = 0 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl b/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl index d3c95a5226e26981cf78e5d8b78a686150a9310d..918f92138a9fe04f9b2c4ddddefa472845d4eb5f 100644 --- a/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl +++ b/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl @@ -31,5 +31,3 @@ r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \ + theta*u*inner(A*grad(v), n)*ds \ - theta*g*inner(A*grad(v), n)*ds \ - gamma_ext*g*v*ds - -forms = [r] diff --git a/applications/knl/poisson_dg_tensor/verify.mini b/applications/knl/poisson_dg_tensor/verify.mini index c2447c077f3ad00c39585b846e57b01b19ef23f3..cedf2fb3418dd92e5678ced1d20bbe0e73530d7d 100644 --- a/applications/knl/poisson_dg_tensor/verify.mini +++ b/applications/knl/poisson_dg_tensor/verify.mini @@ -9,6 +9,9 @@ name = {__name} extension = vtu [formcompiler] +architecture = knl + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 @@ -16,7 +19,6 @@ vectorization_strategy = explicit vectorization_horizontal = 4 vectorization_vertical = 2 quadrature_order = 6 -architecture = knl [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini index e99c20202fad49ef344c37776b9c4264a88bbfcf..7e65a4815f5fccc2dc6aae955bbdef559e9ebe92 100644 --- a/applications/poisson_dg/poisson_dg.mini +++ b/applications/poisson_dg/poisson_dg.mini @@ -36,15 +36,18 @@ name = {__name} extension = vtu [formcompiler] +instrumentation_level = 2, 3, 4 | expand +opcounter = 1, 0 | expand opcount +performance_measuring = 0, 1 | expand opcount + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval -assure_statement_ordering = 1 +matrix_free = 1 +generate_jacobians = 0 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/poisson_dg/poisson_dg.ufl b/applications/poisson_dg/poisson_dg.ufl index 7b5c3e548f81688a0735256411fd54e9d49fa9f4..54c53913fcdb844eb19c89d5100d6461536a1df7 100644 --- a/applications/poisson_dg/poisson_dg.ufl +++ b/applications/poisson_dg/poisson_dg.ufl @@ -29,4 +29,3 @@ r = inner(grad(u), grad(v))*dx \ - theta*g*inner(grad(v), n)*ds \ - gamma_ext*g*v*ds -forms = [r] diff --git a/applications/poisson_dg/verify.mini b/applications/poisson_dg/verify.mini index a2eed15308793b163781a9ecba590cd31c4f7400..475cb4db8e0e93276ea797cef786ad3d0924ccb6 100644 --- a/applications/poisson_dg/verify.mini +++ b/applications/poisson_dg/verify.mini @@ -9,12 +9,14 @@ name = {__name} extension = vtu [formcompiler] +exact_solution_expression = g +compare_l2errorsquared = 1e-6 + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit -exact_solution_expression = g -compare_l2errorsquared = 1e-6 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.mini b/applications/poisson_dg_tensor/poisson_dg_tensor.mini index 720b89578df0c59b88c764265f761c2be9e54546..4e7ac555b33fa5fcef568cca5b6dddf6dd3552e3 100644 --- a/applications/poisson_dg_tensor/poisson_dg_tensor.mini +++ b/applications/poisson_dg_tensor/poisson_dg_tensor.mini @@ -36,15 +36,18 @@ name = {__name} extension = vtu [formcompiler] +instrumentation_level = 2, 3, 4 | expand +opcounter = 1, 0 | expand opcount +performance_measuring = 0, 1 | expand opcount + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval -assure_statement_ordering = 1 +matrix_free = 1 +generate_jacobians = 0 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.ufl b/applications/poisson_dg_tensor/poisson_dg_tensor.ufl index 2b1a7b9803016a50b609a9d5379461c20252310c..be57149e091d1e7f9d1eb4229992965fbf3ee633 100644 --- a/applications/poisson_dg_tensor/poisson_dg_tensor.ufl +++ b/applications/poisson_dg_tensor/poisson_dg_tensor.ufl @@ -32,5 +32,4 @@ r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \ - theta*g*inner(A*grad(v), n)*ds \ - gamma_ext*g*v*ds -forms = [r] exact_solution = g diff --git a/applications/poisson_dg_tensor/verify.mini b/applications/poisson_dg_tensor/verify.mini index 774b5e1b9c033c519c19bb35b8deb5db0efd58aa..d33be7360b97c07c7eeca56e878adf824381c053 100644 --- a/applications/poisson_dg_tensor/verify.mini +++ b/applications/poisson_dg_tensor/verify.mini @@ -9,11 +9,13 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-6 + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit -compare_l2errorsquared = 1e-6 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/stokes_dg/stokes_dg.mini b/applications/stokes_dg/stokes_dg.mini index 9cf5f0ba066b7879cbde5120c7a5b2d7f92de077..a80567f9b73473ae09e52c3a768177431435e849 100644 --- a/applications/stokes_dg/stokes_dg.mini +++ b/applications/stokes_dg/stokes_dg.mini @@ -37,16 +37,19 @@ name = {__name} extension = vtu [formcompiler] +instrumentation_level = 2, 3, 4 | expand +opcounter = 1, 0 | expand opcount +performance_measuring = 0, 1 | expand opcount + +[formcompiler.r] fastdg = 1 sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = model vectorization_allow_quadrature_changes = 1 -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount quadrature_order = {formcompiler.ufl_variants.v_degree} * 2 | eval -assure_statement_ordering = 1 +matrix_free = 1 +generate_jacobians = 0 [formcompiler.ufl_variants] cell = hexahedron diff --git a/applications/stokes_dg/stokes_dg.ufl b/applications/stokes_dg/stokes_dg.ufl index 8c2ac036b6bb8961a312037b88866e5a1aff4ccb..d36556dbaf38b76dbc565437a28de38889b62d0f 100644 --- a/applications/stokes_dg/stokes_dg.ufl +++ b/applications/stokes_dg/stokes_dg.ufl @@ -1,4 +1,5 @@ cell = hexahedron +dim = 3 x = SpatialCoordinate(cell) g_v = as_vector((4.*x[1]*(1.-x[1]), 0.0, 0.0)) @@ -14,23 +15,29 @@ u, p = TrialFunctions(TH) ds = ds(subdomain_id=1, subdomain_data=bctype) n = FacetNormal(cell)('+') -eps = -1.0 -sigma = 1.0 -h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * v_degree * (v_degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * v_degree * (v_degree + dim - 1)) / h_int r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ - + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + + gamma_int * inner(jump(u), jump(v))*dS \ + + theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ - + sigma / h_e * inner(u-g_v, v)*ds \ - + eps * inner(grad(v)*n, u-g_v)*ds \ + + gamma_ext * inner(u-g_v, v)*ds \ + + theta * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8*(1.-x[0]) \ No newline at end of file diff --git a/bin/timings.sh b/bin/timings.sh new file mode 100755 index 0000000000000000000000000000000000000000..b85e6589dc9086b1488f788cccc86118e8a51e1e --- /dev/null +++ b/bin/timings.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# If an argument was given use it as the working directory +if [ $# -eq 1 ] +then + cd $1 +fi + +# Search for runnable executables +FILES=$(ls *.ini | grep -v '^verify') +for inifile in $FILES +do + line=$(grep ^"opcounter = " $inifile) + extract=${line##opcounter = } + UPPER=10 + if [ $extract -eq 1 ] + then + UPPER=1 + fi + COUNT=0 + while [ $COUNT -lt $UPPER ]; do + exec=${inifile%.ini} + MAXCORES=40 + mpirun --bind-to core -np $MAXCORES ./$exec $inifile + COUNT=$((COUNT + 1)) + done +done diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index 415cd9c6354e3318db1675739c36a3f90bee2916..85e38089ee5a7389a34f1697ae7a6939e8cf8a45 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -1,2 +1,5 @@ -install(FILES DunePerftoolMacros.cmake +install(FILES deplist.py + DunePerftoolMacros.cmake + GeneratedSystemtest.cmake + perftool_sourcepath.py DESTINATION ${DUNE_INSTALL_MODULEDIR}) diff --git a/cmake/modules/DunePerftoolMacros.cmake b/cmake/modules/DunePerftoolMacros.cmake index 66d0b4d7537b4ba44c69bfa4ebfd6b8fe9aae2c4..ca33bfae93d7bcedc198d864518f424944924efb 100644 --- a/cmake/modules/DunePerftoolMacros.cmake +++ b/cmake/modules/DunePerftoolMacros.cmake @@ -8,28 +8,23 @@ # # The UFL file to create the executable from. # -# .. cmake_param:: TARGET +# .. cmake_param:: INIFILE # :single: # :required: # -# The name given to the added executable target. -# -# .. cmake_param:: OPERATOR -# :single: -# -# The local operator file name to generate. Defaults -# to a suitably mangled, but not easily readable name. +# The ini file that controls the form compilation process. +# It is expected to contain a [formcompiler] section # -# .. cmake_param:: DRIVER +# .. cmake_param:: TARGET # :single: +# :required: # -# The driver file name to generate. Defaults -# to a suitably mangled, but not easily readable name. +# The name given to the added executable target. # -# .. cmake_param:: MAIN +# .. cmake_param:: SOURCE # -# The main source file to generate. Defaults -# to a suitably mangled, but not easily readable name. +# The cc source file to build from. If omitted, a minimal +# source file and a driver file will be generated. # # .. cmake_param:: FORM_COMPILER_ARGS # :multi: @@ -44,18 +39,17 @@ # Additional dependencies of the generated executable (changes in those # will retrigger generation) # +# .. cmake_param:: EXCLUDE_FROM_ALL +# :option: +# +# Set this option, if you do not want the target to be automatically +# built. This option is forwarded to the builtin command add_executable. +# # Add an executable to the project that gets automatically # generated at configure time with the form compiler uf2pdelab. # Regeneration is triggered correctly if the UFL file or the # form compiler changed. # -# .. cmake_variable:: UFL2PDELAB_INTERACTIVE -# -# If this variable is set, all code generation will be done in -# interactive mode. This option is interesting in development -# of the form compiler, but might be quite tedious in production -# and automated testing. -# add_custom_target(generation) @@ -63,6 +57,7 @@ add_custom_target(generation) # to have correct retriggers of generated executables if(CMAKE_PROJECT_NAME STREQUAL dune-perftool) set(UFL2PDELAB_GLOB_PATTERN "${CMAKE_SOURCE_DIR}/python/*.py") + set(perftool_path ${CMAKE_SOURCE_DIR}/cmake/modules) else() dune_module_path(MODULE dune-perftool RESULT perftool_path @@ -76,8 +71,8 @@ endif() file(GLOB_RECURSE UFL2PDELAB_SOURCES ${UFL2PDELAB_GLOB_PATTERN}) function(add_generated_executable) - set(OPTIONS) - set(SINGLE TARGET OPERATOR DRIVER UFLFILE) + set(OPTIONS EXCLUDE_FROM_ALL) + set(SINGLE TARGET SOURCE UFLFILE INIFILE) set(MULTI FORM_COMPILER_ARGS DEPENDS) include(CMakeParseArguments) cmake_parse_arguments(GEN "${OPTIONS}" "${SINGLE}" "${MULTI}" ${ARGN}) @@ -96,42 +91,56 @@ function(add_generated_executable) if(NOT IS_ABSOLUTE GEN_UFLFILE) set(GEN_UFLFILE ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_UFLFILE}) endif() - if(NOT GEN_OPERATOR) - set(GEN_OPERATOR ${GEN_TARGET}_operator.hh) - set(GEN_OPERATOR ${CMAKE_CURRENT_BINARY_DIR}/${GEN_OPERATOR}) - endif() - if(NOT GEN_DRIVER) - set(GEN_DRIVER ${GEN_TARGET}_driver.hh) - set(GEN_DRIVER ${CMAKE_CURRENT_BINARY_DIR}/${GEN_DRIVER}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_INIFILE}) + set(GEN_INIFILE ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_INIFILE}) endif() - if(NOT GEN_MAIN) - set(GEN_MAIN ${GEN_TARGET}_main.cc) - set(GEN_MAIN ${CMAKE_CURRENT_BINARY_DIR}/${GEN_MAIN}) + if(NOT GEN_SOURCE) + # Generate a driver file + set(GEN_SOURCE ${GEN_TARGET}_driver.cc) + add_custom_command(OUTPUT ${GEN_SOURCE} + COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env generate_driver + --uflfile ${GEN_UFLFILE} + --ini-file ${GEN_INIFILE} + --target-name ${GEN_TARGET} + --driver-file ${GEN_SOURCE} + --project-basedir ${CMAKE_BINARY_DIR} + ${GEN_FORM_COMPILER_ARGS} + DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_PERFTOOL_ADDITIONAL_PYTHON_SOURCES} + COMMENT "Generating driver for the target ${GEN_TARGET}" + ) endif() - if(UFL2PDELAB_INTERACTIVE) - set(GEN_FORM_COMPILER_ARGS ${GEN_FORM_COMPILER_ARGS} --interactive) + if(GEN_EXCLUDE_FROM_ALL) + set(GEN_EXCLUDE_FROM_ALL "EXCLUDE_FROM_ALL") + else() + set(GEN_EXCLUDE_FROM_ALL "") endif() - # Write a standard main function - dune_module_path(MODULE dune-perftool - RESULT perftool_path - CMAKE_MODULES) - configure_file(${perftool_path}/StandardMain.cmake ${GEN_MAIN}) - - add_custom_command(OUTPUT ${GEN_OPERATOR} ${GEN_DRIVER} - COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env ufl2pdelab - --project-basedir ${CMAKE_BINARY_DIR} - --operator-file ${GEN_OPERATOR} - --driver-file ${GEN_DRIVER} - ${GEN_FORM_COMPILER_ARGS} - --uflfile ${GEN_UFLFILE} - DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_PERFTOOL_ADDITIONAL_PYTHON_SOURCES} - COMMENT "Running ufl2pdelab for the target ${GEN_TARGET}" - ) + # Parse a mapping of operators to build and their respective filenames + dune_execute_process(COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env python ${perftool_path}/deplist.py ${GEN_INIFILE} ${GEN_TARGET} + OUTPUT_VARIABLE depdata + ) + parse_python_data(PREFIX depdata INPUT ${depdata}) - add_executable(${GEN_TARGET} ${GEN_MAIN} ${GEN_OPERATOR} ${GEN_DRIVER}) + # Define build rules for all operator header files and gather a list of them + set(header_deps) + foreach(op ${depdata___operators}) + add_custom_command(OUTPUT ${depdata___${op}} + COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env generate_operators + --project-basedir ${CMAKE_BINARY_DIR} + ${GEN_FORM_COMPILER_ARGS} + --uflfile ${GEN_UFLFILE} + --ini-file ${GEN_INIFILE} + --target-name ${GEN_TARGET} + --operator-to-build ${op} + DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_PERFTOOL_ADDITIONAL_PYTHON_SOURCES} + COMMENT "Generating operator file ${depdata___${op}} for the target ${GEN_TARGET}" + ) + set(header_deps ${header_deps} ${depdata___${op}}) + endforeach() + add_executable(${GEN_TARGET} ${GEN_EXCLUDE_FROM_ALL} ${GEN_SOURCE} ${header_deps}) + target_include_directories(${GEN_TARGET} PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) add_dependencies(generation ${GEN_TARGET}) endfunction() -include(GeneratedSystemtests) \ No newline at end of file +include(GeneratedSystemtests) diff --git a/cmake/modules/GeneratedSystemtests.cmake b/cmake/modules/GeneratedSystemtests.cmake index de9dc259ffd2442bde0c522e2883e79b461ba621..634d7ad35605ef6707436eaad98f882c40bb4c1d 100644 --- a/cmake/modules/GeneratedSystemtests.cmake +++ b/cmake/modules/GeneratedSystemtests.cmake @@ -4,7 +4,7 @@ function(dune_add_formcompiler_system_test) # parse arguments set(OPTION DEBUG NO_TESTS) - set(SINGLE INIFILE BASENAME SCRIPT UFLFILE) + set(SINGLE INIFILE BASENAME SCRIPT UFLFILE SOURCE) set(MULTI CREATED_TARGETS) cmake_parse_arguments(SYSTEMTEST "${OPTION}" "${SINGLE}" "${MULTI}" ${ARGN}) @@ -12,11 +12,15 @@ function(dune_add_formcompiler_system_test) message(WARNING "dune_add_system_test: Encountered unparsed arguments: This often indicates typos in named arguments") endif() - # construct a string containg DEBUG to pass the debug flag to the other macros + # Construct strings to pass options to other functions set(DEBUG "") if(SYSTEMTEST_DEBUG) set(DEBUG "DEBUG") endif() + set(SOURCE "") + if(SYSTEMTEST_SOURCE) + set(SOURCE SOURCE ${SYSTEMTEST_SOURCE}) + endif() # set a default for the script. call_executable.py just calls the executable. # There, it is also possible to hook in things depending on the inifile @@ -49,12 +53,26 @@ function(dune_add_formcompiler_system_test) add_generated_executable(TARGET ${tname} UFLFILE ${SYSTEMTEST_UFLFILE} - FORM_COMPILER_ARGS --ini-file ${inifile} + INIFILE "${CMAKE_CURRENT_BINARY_DIR}/${inifile}" DEPENDS ${SYSTEMTEST_INIFILE} + EXCLUDE_FROM_ALL + ${SOURCE} ) - # Exclude the target from all - set_property(TARGET ${tname} PROPERTY EXCLUDE_FROM_ALL 1) + # Enrich the target with preprocessor variables from the __static section + # just the way that dune-testtools does. + dune_execute_process(COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env dune_extract_static.py + --ini ${inifile} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + OUTPUT_VARIABLE output + ERROR_MESSAGE "Error extracting static info from ${inifile}") + parse_python_data(PREFIX STAT INPUT "${output}") + + foreach(config ${STAT___CONFIGS}) + foreach(cd ${STAT___STATIC_DATA}) + target_compile_definitions(${tname} PUBLIC "${cd}=${STAT_${config}_${cd}}") + endforeach() + endforeach() # Add dependency on the metatarget for this systemtest if(NOT ${INIINFO_${inifile}_suffix} STREQUAL "__empty") @@ -67,10 +85,15 @@ function(dune_add_formcompiler_system_test) _add_test(NAME ${tname} COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env ${SYSTEMTEST_SCRIPT} - --exec ${tname} - --ini "${CMAKE_CURRENT_BINARY_DIR}/${inifile}" - --source ${CMAKE_CURRENT_SOURCE_DIR} - ) + --exec ${tname} + --ini "${CMAKE_CURRENT_BINARY_DIR}/${inifile}" + --source ${CMAKE_CURRENT_SOURCE_DIR} + --mpi-exec "${MPIEXEC}" + --mpi-numprocflag=${MPIEXEC_NUMPROC_FLAG} + --mpi-preflags "${MPIEXEC_PREFLAGS}" + --mpi-postflags "${MPIEXEC_POSTFLAGS}" + --max-processors=${DUNE_MAX_TEST_CORES} + ) set_tests_properties(${tname} PROPERTIES SKIP_RETURN_CODE 77) set_tests_properties(${tname} PROPERTIES TIMEOUT 60) diff --git a/cmake/modules/deplist.py b/cmake/modules/deplist.py new file mode 100755 index 0000000000000000000000000000000000000000..9cb5d7d42cbfc712e18e37798dffc2d553416f8c --- /dev/null +++ b/cmake/modules/deplist.py @@ -0,0 +1,26 @@ +# Return the list of generated files for a given ini file +# This is used by the build system, do not use this yourself! + +from dune.testtools.parser import parse_ini_file +from dune.testtools.cmakeoutput import printForCMake + +import sys + +ini = parse_ini_file(sys.argv[1]) +section = ini["formcompiler"] +operators = section.get("operators", "r") +operators = [i.strip() for i in operators.split(",")] + +def get_filename(operator): + ssection = ini.get("formcompiler.{}".format(operator), {}) + if ssection.get("filename", None): + return ssection["filename"] + else: + classname = ssection.get("classname", "{}Operator".format(ssection.get("form", operator))) + return "{}_{}_file.hh".format(sys.argv[2], classname) + +result = {"__{}".format(o): get_filename(o) for o in operators} +result["__operators"] = ";".join(operators) + +printForCMake(result) +sys.exit(0) diff --git a/dune/perftool/blockstructured/blockstructuredqkfem.hh b/dune/perftool/blockstructured/blockstructuredqkfem.hh index fd27ef36d65ac8f25c18f3f44fd0ebf68d16189e..90ad10e684fe2735527e527701dc4962b510e3d7 100644 --- a/dune/perftool/blockstructured/blockstructuredqkfem.hh +++ b/dune/perftool/blockstructured/blockstructuredqkfem.hh @@ -15,7 +15,7 @@ namespace Dune { //! \ingroup FiniteElementMap template<typename GV, typename D, typename R, std::size_t k> class BlockstructuredQkLocalFiniteElementMap - : public SimpleLocalFiniteElementMap< Dune::QkLocalFiniteElement<D,R,GV::dimension,k> > + : public SimpleLocalFiniteElementMap< Dune::QkLocalFiniteElement<D,R,GV::dimension,k>, GV::dimension> { public: diff --git a/dune/perftool/common/opcounter.hh b/dune/perftool/common/opcounter.hh index a103a06550665b26e840bf083c60b8019296b2be..edb16eaacc39f3c383fc7ace2fc7b733354116e7 100644 --- a/dune/perftool/common/opcounter.hh +++ b/dune/perftool/common/opcounter.hh @@ -16,6 +16,16 @@ namespace oc { template<typename F> class OpCounter; + template<typename T> + struct isOpCounter : public std::false_type + {}; + + template<typename F> + struct isOpCounter<OpCounter<F>> : public std::true_type + {}; + + template<typename T> + constexpr bool isOpCounterV = isOpCounter<T>::value; } namespace Dune { diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh index 3d3fae8b61d6a435391160456952144dfd12d96d..6204b0a213796861533b9668f4c9a869198eb7cc 100644 --- a/dune/perftool/common/vectorclass.hh +++ b/dune/perftool/common/vectorclass.hh @@ -11,12 +11,38 @@ #define BARRIER asm volatile("": : :"memory") +template<typename T> +struct base_floatingpoint +{}; + #ifndef ENABLE_COUNTER #include <dune/perftool/vectorclass/vectorclass.h> #include <dune/perftool/vectorclass/vectormath_exp.h> #include <dune/perftool/vectorclass/vectormath_trig.h> +template<> +struct base_floatingpoint<Vec4d> +{ + using value = double; +}; + +template<> +struct base_floatingpoint<Vec8f> +{ + using value = float; +}; + +#if MAX_VECTOR_SIZE >= 512 + +template<> +struct base_floatingpoint<Vec8d> +{ + using value = double; +}; + +#endif + #else #include <algorithm> @@ -46,10 +72,11 @@ struct Vec4d BARRIER; } - Vec4d(double d) + Vec4d(F dl, F du) { BARRIER; - std::fill(_d,_d+4,d); + std::fill(_d,_d+2,dl); + std::fill(_d+2,_d+4,du); BARRIER; } @@ -114,6 +141,11 @@ struct Vec4d }; +template<> +struct base_floatingpoint<Vec4d> +{ + using value = typename Vec4d::F; +}; /***************************************************************************** * @@ -142,7 +174,7 @@ static inline Vec4d & operator += (Vec4d & a, Vec4d const & b) { static inline Vec4d operator ++ (Vec4d & a, int) { BARRIER; Vec4d a0 = a; - a = a + 1.0; + a = a + Vec4d(1.0); BARRIER; return a0; } @@ -150,7 +182,7 @@ static inline Vec4d operator ++ (Vec4d & a, int) { // prefix operator ++ static inline Vec4d & operator ++ (Vec4d & a) { BARRIER; - a = a + 1.0; + a = a + Vec4d(1.0); BARRIER; return a; } @@ -187,7 +219,7 @@ static inline Vec4d & operator -= (Vec4d & a, Vec4d const & b) { static inline Vec4d operator -- (Vec4d & a, int) { BARRIER; Vec4d a0 = a; - a = a - 1.0; + a = a - Vec4d(1.0); BARRIER; return a0; } @@ -195,7 +227,7 @@ static inline Vec4d operator -- (Vec4d & a, int) { // prefix operator -- static inline Vec4d & operator -- (Vec4d & a) { BARRIER; - a = a - 1.0; + a = a - Vec4d(1.0); BARRIER; return a; } @@ -248,6 +280,30 @@ static inline _vcl::Vec4db operator == (Vec4d const & a, Vec4d const & b) { return a_ == b_; } +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + // vector operator != : returns true for elements for which a != b static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -262,6 +318,30 @@ static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { return a_ != b_; } +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + // vector operator < : returns true for elements for which a < b static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -276,6 +356,30 @@ static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { return a_ < b_; } +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ < b_; +} + +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ < a_; +} + // vector operator <= : returns true for elements for which a <= b static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -290,16 +394,61 @@ static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { return a_ <= b_; } +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ <= b_; +} + +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ <= a_; +} + // vector operator > : returns true for elements for which a > b static inline _vcl::Vec4db operator > (Vec4d const & a, Vec4d const & b) { return b < a; } +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (oc::OpCounter<double> a, Vec4d const & b) { + return a < b; +} + +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (Vec4d const & b, oc::OpCounter<double> a) { + return a < b; +} + // vector operator >= : returns true for elements for which a >= b static inline _vcl::Vec4db operator >= (Vec4d const & a, Vec4d const & b) { return b <= a; } +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (oc::OpCounter<double> a, Vec4d const & b) { + return b <= a; +} + +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (Vec4d const & b, oc::OpCounter<double> a) { + return a <= b; +} + + // avoid logical operators for now, I don't think we need them #if 0 @@ -415,81 +564,40 @@ static inline Vec4d exp(Vec4d const & a){ return r; } - -// ignore pow() for now -#if 0 - -// pow(Vec4d, int): -template <typename TT> static Vec4d pow(Vec4d const & a, TT n); - -// Raise floating point numbers to integer power n -template <> -inline Vec4d pow<int>(Vec4d const & x0, int n) { - return pow_template_i<Vec4d>(x0, n); +// pow +template <typename TT> +static inline Vec4d pow(Vec4d const & a, oc::OpCounter<TT> n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// allow conversion from unsigned int -template <> -inline Vec4d pow<uint32_t>(Vec4d const & x0, uint32_t n) { - return pow_template_i<Vec4d>(x0, (int)n); +// pow +template <typename TT> +static inline +std::enable_if_t<not oc::isOpCounterV<TT>, Vec4d> pow(Vec4d const & a, TT n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// Raise floating point numbers to integer power n, where n is a compile-time constant -template <int n> -static inline Vec4d pow_n(Vec4d const & a) { - if (n < 0) return Vec4d(1.0) / pow_n<-n>(a); - if (n == 0) return Vec4d(1.0); - if (n >= 256) return pow(a, n); - Vec4d x = a; // a^(2^i) - Vec4d y; // accumulator - const int lowest = n - (n & (n-1));// lowest set bit in n - if (n & 1) y = x; - if (n < 2) return y; - x = x*x; // x^2 - if (n & 2) { - if (lowest == 2) y = x; else y *= x; - } - if (n < 4) return y; - x = x*x; // x^4 - if (n & 4) { - if (lowest == 4) y = x; else y *= x; - } - if (n < 8) return y; - x = x*x; // x^8 - if (n & 8) { - if (lowest == 8) y = x; else y *= x; - } - if (n < 16) return y; - x = x*x; // x^16 - if (n & 16) { - if (lowest == 16) y = x; else y *= x; - } - if (n < 32) return y; - x = x*x; // x^32 - if (n & 32) { - if (lowest == 32) y = x; else y *= x; - } - if (n < 64) return y; - x = x*x; // x^64 - if (n & 64) { - if (lowest == 64) y = x; else y *= x; - } - if (n < 128) return y; - x = x*x; // x^128 - if (n & 128) { - if (lowest == 128) y = x; else y *= x; - } - return y; -} - -template <int n> -static inline Vec4d pow(Vec4d const & a, Const_int_t<n>) { - return pow_n<n>(a); +static inline Vec4d select(const _vcl::Vec4db& s, const Vec4d& a, const Vec4d& b) +{ + BARRIER; + Vec4d r; + for(int i=0; i<4; ++i) + r._d[i] = s.extract(i) ? a._d[i] : b._d[i]; + BARRIER; + return r; } -#endif - // function round: round to nearest integer (even). (result as double vector) static inline Vec4d round(Vec4d const & a) { BARRIER; @@ -632,10 +740,19 @@ struct Vec8d BARRIER; } - Vec8d(double d) + Vec8d(F dl, F du) { BARRIER; - std::fill(_d,_d+8,d); + std::fill(_d,_d+4,dl); + std::fill(_d+4,_d+8,du); + BARRIER; + } + + Vec8d(Vec4d low, Vec4d high) + { + BARRIER; + std::copy(_d, _d+4, low._d); + std::copy(_d+4, _d+8, high._d); BARRIER; } @@ -645,6 +762,24 @@ struct Vec8d BARRIER; } + Vec4d get_low() const + { + BARRIER; + Vec4d ret; + ret.load(_d); + BARRIER; + return ret; + } + + Vec4d get_high() const + { + BARRIER; + Vec4d ret; + ret.load(_d + 4); + BARRIER; + return ret; + } + Vec8d& load(const F* p) { BARRIER; @@ -700,6 +835,11 @@ struct Vec8d }; +template<> +struct base_floatingpoint<Vec8d> +{ + using value = typename Vec8d::F; +}; /***************************************************************************** * @@ -728,7 +868,7 @@ static inline Vec8d & operator += (Vec8d & a, Vec8d const & b) { static inline Vec8d operator ++ (Vec8d & a, int) { BARRIER; Vec8d a0 = a; - a = a + 1.0; + a = a + Vec8d(1.0); BARRIER; return a0; } @@ -736,7 +876,7 @@ static inline Vec8d operator ++ (Vec8d & a, int) { // prefix operator ++ static inline Vec8d & operator ++ (Vec8d & a) { BARRIER; - a = a + 1.0; + a = a + Vec8d(1.0); BARRIER; return a; } @@ -773,7 +913,7 @@ static inline Vec8d & operator -= (Vec8d & a, Vec8d const & b) { static inline Vec8d operator -- (Vec8d & a, int) { BARRIER; Vec8d a0 = a; - a = a - 1.0; + a = a - Vec8d(1.0); BARRIER; return a0; } @@ -781,7 +921,7 @@ static inline Vec8d operator -- (Vec8d & a, int) { // prefix operator -- static inline Vec8d & operator -- (Vec8d & a) { BARRIER; - a = a - 1.0; + a = a - Vec8d(1.0); BARRIER; return a; } @@ -1139,10 +1279,15 @@ struct Vec8f }; +template<> +struct base_floatingpoint<Vec8f> +{ + using value = typename Vec8f::F; +}; /***************************************************************************** * -* Operators for Vec4d +* Operators for Vec8f * *****************************************************************************/ @@ -1167,7 +1312,7 @@ static inline Vec8f & operator += (Vec8f & a, Vec8f const & b) { static inline Vec8f operator ++ (Vec8f & a, int) { BARRIER; Vec8f a0 = a; - a = a + 1.0; + a = a + Vec8f(1.0); BARRIER; return a0; } @@ -1175,7 +1320,7 @@ static inline Vec8f operator ++ (Vec8f & a, int) { // prefix operator ++ static inline Vec8f & operator ++ (Vec8f & a) { BARRIER; - a = a + 1.0; + a = a + Vec8f(1.0); BARRIER; return a; } @@ -1212,7 +1357,7 @@ static inline Vec8f & operator -= (Vec8f & a, Vec8f const & b) { static inline Vec8f operator -- (Vec8f & a, int) { BARRIER; Vec8f a0 = a; - a = a - 1.0; + a = a - Vec8f(1.0); BARRIER; return a0; } @@ -1220,7 +1365,7 @@ static inline Vec8f operator -- (Vec8f & a, int) { // prefix operator -- static inline Vec8f & operator -- (Vec8f & a) { BARRIER; - a = a - 1.0; + a = a - Vec8f(1.0); BARRIER; return a; } diff --git a/dune/perftool/sumfact/horizontaladd.hh b/dune/perftool/sumfact/horizontaladd.hh new file mode 100644 index 0000000000000000000000000000000000000000..db7634f0e5507214318dabb719240ff3674808ed --- /dev/null +++ b/dune/perftool/sumfact/horizontaladd.hh @@ -0,0 +1,19 @@ +#ifndef DUNE_PERFTOOL_SUMFACT_HORIZONTALADD_HH +#define DUNE_PERFTOOL_SUMFACT_HORIZONTALADD_HH + +#include<dune/perftool/common/vectorclass.hh> + + +template<class V> +typename base_floatingpoint<V>::value horizontal_add_lower(const V& x) +{ + return horizontal_add(x.get_low()); +} + +template<class V> +typename base_floatingpoint<V>::value horizontal_add_upper(const V& x) +{ + return horizontal_add(x.get_high()); +} + +#endif diff --git a/dune/perftool/sumfact/onedquadrature.hh b/dune/perftool/sumfact/onedquadrature.hh index 72c4cde64cc7afb7e337b7bb7744ce33f8977e22..6ff3195c54f29c843ca5822fdaf05710834f57de 100644 --- a/dune/perftool/sumfact/onedquadrature.hh +++ b/dune/perftool/sumfact/onedquadrature.hh @@ -30,7 +30,7 @@ void onedQuadraturePointsWeights(RF (&qp)[m], RF (&qw)[m]){ } // end 1D quadrature loop // Order 1D quadrature points lexicographically for (size_t j=0; j<m/2; j++){ - if (qp[j]>0.5){ + if (qp[j]>DF(0.5)){ RF temp=qp[j]; qp[j] = qp[m-1-j]; qp[m-1-j] = temp; diff --git a/dune/perftool/sumfact/transposereg.hh b/dune/perftool/sumfact/transposereg.hh index f73c6a2f717b243e32411a5feea948c7777ce430..d2ce09c39bf6ff5b87cc236cbf5d2a5bfcf850f1 100644 --- a/dune/perftool/sumfact/transposereg.hh +++ b/dune/perftool/sumfact/transposereg.hh @@ -66,6 +66,9 @@ void transpose_reg(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3) a3 = blend8d<4,5,6,7,12,13,14,15>(b1, b3); } +/** TODO: Is this transpose using blend8d superior to the swap_halves + * version below using get_low/get_high? + */ void transpose_reg (Vec8d& a0, Vec8d& a1) { Vec8d b0, b1; @@ -75,6 +78,48 @@ void transpose_reg (Vec8d& a0, Vec8d& a1) a1 = b1; } +namespace impl +{ + /* (alow, aupp), (blow, bupp) -> (alow, blow), (aupp, bupp) */ + void swap_halves(Vec8d& a, Vec8d& b) + { + Vec4d tmp = a.get_high(); + a = Vec8d(a.get_low(), b.get_low()); + b = Vec8d(tmp, b.get_high()); + } + + /* A 4x8 transpose that behaves exactly like Vec4d's 4x4 transpose + * on the lower and upper halves of the Vec8d + */ + void _transpose4x8(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3) + { + Vec8d b0,b1,b2,b3; + b0 = blend8d<0,8,2,10,4,12,6,14>(a0,a1); + b1 = blend8d<1,9,3,11,5,13,7,15>(a0,a1); + b2 = blend8d<0,8,2,10,4,12,6,14>(a2,a3); + b3 = blend8d<1,9,3,11,5,13,7,15>(a2,a3); + a0 = blend8d<0,1,8,9,4,5,12,13>(b0,b2); + a1 = blend8d<0,1,8,9,4,5,12,13>(b1,b3); + a2 = blend8d<2,3,10,11,6,7,14,15>(b0,b2); + a3 = blend8d<2,3,10,11,6,7,14,15>(b1,b3); + } +} + +/* This is the 8x8 transpose of Vec8d's. It uses the same shuffling + * as Vec4d, but on the 4x4 subblocks. Afterwards, the off diagonal + * blocks are swapped. + */ +void transpose_reg(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3, + Vec8d& a4, Vec8d& a5, Vec8d& a6, Vec8d& a7) +{ + impl::_transpose4x8(a0,a1,a2,a3); + impl::_transpose4x8(a4,a5,a6,a7); + impl::swap_halves(a0,a4); + impl::swap_halves(a1,a5); + impl::swap_halves(a2,a6); + impl::swap_halves(a3,a7); +} + #endif #endif diff --git a/patches/apply_patches.sh b/patches/apply_patches.sh index 4abe4fa9de8f80bac0465ccfa7724035ce586063..5fa3ab5e28c162fb391e0b66ec5aec066d4b903a 100755 --- a/patches/apply_patches.sh +++ b/patches/apply_patches.sh @@ -2,6 +2,7 @@ pushd python/loopy git apply ../../patches/loopy/Current.patch +git apply ../../patches/loopy/0001-Disable-a-logging-statement-that-breaks.patch popd pushd dune/perftool/vectorclass @@ -12,7 +13,3 @@ pushd python/ufl git apply ../../patches/ufl/conditional-uflid.patch git apply ../../patches/ufl/0001-Remove-special-case-for-variable-in-ufl2dot.patch popd - -pushd python/ufl -git apply ../../patches/ufl/tensor-product-element.patch -popd diff --git a/patches/loopy/0001-Disable-a-logging-statement-that-breaks.patch b/patches/loopy/0001-Disable-a-logging-statement-that-breaks.patch new file mode 100644 index 0000000000000000000000000000000000000000..436533b399471411d105addab125814de66ec4e5 --- /dev/null +++ b/patches/loopy/0001-Disable-a-logging-statement-that-breaks.patch @@ -0,0 +1,33 @@ +From abac8a2068e0333a0f00c276519c24c5c16bedf4 Mon Sep 17 00:00:00 2001 +From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> +Date: Mon, 26 Mar 2018 11:13:42 +0200 +Subject: [PATCH] Disable a logging statement that breaks + +--- + loopy/kernel/tools.py | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py +index 15840180..cb877eb6 100644 +--- a/loopy/kernel/tools.py ++++ b/loopy/kernel/tools.py +@@ -197,11 +197,11 @@ def find_all_insn_inames(kernel): + assert isinstance(write_deps, frozenset), type(insn) + assert isinstance(iname_deps, frozenset), type(insn) + +- logger.debug("%s: find_all_insn_inames: %s (init): %s - " +- "read deps: %s - write deps: %s" % ( +- kernel.name, insn.id, ", ".join(sorted(iname_deps)), +- ", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)), +- )) ++# logger.debug("%s: find_all_insn_inames: %s (init): %s - " ++# "read deps: %s - write deps: %s" % ( ++# kernel.name, insn.id, ", ".join(sorted(iname_deps)), ++# ", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)), ++# )) + + insn_id_to_inames[insn.id] = iname_deps + insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() +-- +2.11.0 + diff --git a/patches/ufl/tensor-product-element.patch b/patches/ufl/tensor-product-element.patch deleted file mode 100644 index 9fc64f124e95bcbf28391ed5be4c87e1040a4e28..0000000000000000000000000000000000000000 --- a/patches/ufl/tensor-product-element.patch +++ /dev/null @@ -1,19 +0,0 @@ -commit f87dcd18d765b0200808b79b2e7374f82a0c6199 -Author: René Heß <rene.hess@iwr.uni-heidelberg.de> -Date: Tue Aug 29 14:56:17 2017 +0200 - - Patch for TensorProductElements - -diff --git a/ufl/algorithms/compute_form_data.py b/ufl/algorithms/compute_form_data.py -index 3388bbfc..1cef3924 100644 ---- a/ufl/algorithms/compute_form_data.py -+++ b/ufl/algorithms/compute_form_data.py -@@ -56,7 +56,7 @@ def _auto_select_degree(elements): - """ - # Use max degree of all elements, at least 1 (to work with - # Lagrange elements) -- return max({e.degree() for e in elements} - {None} | {1}) -+ return max({e.degree() if not isinstance(e.degree(), tuple) else max(e.degree()) for e in elements} - {None} | {1}) - - - def _compute_element_mapping(form): diff --git a/python/cgen b/python/cgen index 0062a75a614db6602012b6e926c4b5ced06fcc89..f411383630b272a3a5d3e28b82acaaa530a64723 160000 --- a/python/cgen +++ b/python/cgen @@ -1 +1 @@ -Subproject commit 0062a75a614db6602012b6e926c4b5ced06fcc89 +Subproject commit f411383630b272a3a5d3e28b82acaaa530a64723 diff --git a/python/dune/perftool/blockstructured/geometry.py b/python/dune/perftool/blockstructured/geometry.py index 951ada695f3c8d29d78cbf8992d2fe7172470083..1ffbdf0417c8261d2f0882b2384ed044af1ddd03 100644 --- a/python/dune/perftool/blockstructured/geometry.py +++ b/python/dune/perftool/blockstructured/geometry.py @@ -2,7 +2,7 @@ from dune.perftool.generation import (get_backend, temporary_variable, instruction) from dune.perftool.tools import get_pymbolic_basename -from dune.perftool.options import (get_option, +from dune.perftool.options import (get_form_option, option_switch) from dune.perftool.pdelab.geometry import (name_jacobian_determinant, local_dimension, @@ -15,20 +15,20 @@ import pymbolic.primitives as prim # scale determinant according to the order of the blockstructure def pymbolic_jacobian_determinant(): return prim.Quotient(prim.Variable(name_jacobian_determinant()), - prim.Power(get_option("number_of_blocks"), local_dimension())) + prim.Power(get_form_option("number_of_blocks"), local_dimension())) # scale Jacobian according to the order of the blockstructure def pymbolic_jacobian_inverse_transposed(i, j, restriction): name_jit = get_backend(interface="name_jit", selector=option_switch("constant_transformation_matrix"))(restriction) - return prim.Product((get_option("number_of_blocks"), + return prim.Product((get_form_option("number_of_blocks"), prim.Subscript(prim.Variable(name_jit), (j, i)))) # scale determinant according to the order of the blockstructure def pymbolic_facet_jacobian_determinant(): return prim.Quotient(prim.Variable(name_facet_jacobian_determinant()), - prim.Power(get_option("number_of_blocks"), local_dimension())) + prim.Power(get_form_option("number_of_blocks"), local_dimension())) # translate a point in the micro element into macro coordinates @@ -45,7 +45,7 @@ def define_point_in_macro(name, point_in_micro): else: expr = prim.Subscript(point_in_micro, (i,)) expr = prim.Sum((expr, prim.Variable(subelem_inames[i]),)) - expr = prim.Quotient(expr, get_option('number_of_blocks')) + expr = prim.Quotient(expr, get_form_option('number_of_blocks')) instruction(assignee=prim.Subscript(prim.Variable(name), (i,)), expression=expr, within_inames=frozenset(subelem_inames), diff --git a/python/dune/perftool/blockstructured/tools.py b/python/dune/perftool/blockstructured/tools.py index e3f1416e150920beb1d9ba186a34d5f0c1b384b6..a9bf01f26f5a52daa91f6cbbcf01bd31dffabb36 100644 --- a/python/dune/perftool/blockstructured/tools.py +++ b/python/dune/perftool/blockstructured/tools.py @@ -11,7 +11,7 @@ from dune.perftool.pdelab.geometry import (local_dimension, from dune.perftool.pdelab.quadrature import quadrature_inames from dune.perftool.generation.counter import get_counted_variable -from dune.perftool.options import get_option +from dune.perftool.options import get_form_option import pymbolic.primitives as prim @@ -19,12 +19,13 @@ import pymbolic.primitives as prim # i.e. each element has (i_1,i_2,...,i_d) indices @iname def sub_element_inames(): + name = "subel" dim = local_dimension() dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] inames = tuple() for i in range(dim): inames = inames + ("subel_" + dim_names[i],) - domain("subel_" + dim_names[i], get_option("number_of_blocks")) + domain("subel_" + dim_names[i], get_form_option("number_of_blocks")) return inames @@ -37,7 +38,7 @@ def sub_element_inames(): def sub_facet_inames(): subelem_inames = sub_element_inames() - center = pymbolic_in_cell_coordinates(prim.Variable(name_localcenter()), Restriction.NEGATIVE) + center = pymbolic_in_cell_coordinates(prim.Variable(name_localcenter()), Restriction.POSITIVE) # check if iname[index] must be constant or not def predicate(index): @@ -58,7 +59,7 @@ def sub_facet_inames(): predicates=frozenset([prim.LogicalNot(predicate(index))]) ) - k = get_option("number_of_blocks") + k = get_form_option("number_of_blocks") inames = ("x",) temporary_variable(inames[0]) @@ -112,7 +113,7 @@ def micro_index_to_macro_index(element, inames): elif it == "exterior_facet" or it == "interior_facet": subelem_inames = sub_facet_inames() - k = get_option("number_of_blocks") + k = get_form_option("number_of_blocks") p = element.degree() return prim.Sum(tuple((p * prim.Variable(si) + prim.Variable(bi)) * (p * k + 1) ** i for i, (si, bi) in enumerate(zip(subelem_inames, inames)))) diff --git a/python/dune/perftool/cgen/__init__.py b/python/dune/perftool/cgen/__init__.py index 128c4dedffb347b256ed64b518fdde9009ce9344..24af73e0d8a2b6dfce3c1146d86c9eeccf7e5a2a 100644 --- a/python/dune/perftool/cgen/__init__.py +++ b/python/dune/perftool/cgen/__init__.py @@ -3,6 +3,7 @@ from __future__ import absolute_import from cgen import * from dune.perftool.cgen.clazz import Class +from dune.perftool.cgen.exceptions import TryCatchBlock, CatchBlock class Namespace(PrivateNamespace): diff --git a/python/dune/perftool/cgen/clazz.py b/python/dune/perftool/cgen/clazz.py index dca212eca2aecee421bee757c900f2f1778949d8..7f74353f8a8ad09960e4aaa5340c83c8fe0357b6 100644 --- a/python/dune/perftool/cgen/clazz.py +++ b/python/dune/perftool/cgen/clazz.py @@ -34,9 +34,10 @@ class BaseClass(Generable): class ClassMember(Generable): - def __init__(self, member, access=AccessModifier.PUBLIC): + def __init__(self, member, access=AccessModifier.PUBLIC, name=""): self.member = member self.access = access + self.name = name if isinstance(member, str): from cgen import Line diff --git a/python/dune/perftool/cgen/exceptions.py b/python/dune/perftool/cgen/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..434b3a62eb7dbc7d4fcc2fbe40cb41b53e202401 --- /dev/null +++ b/python/dune/perftool/cgen/exceptions.py @@ -0,0 +1,40 @@ +""" Add Try/Catch blocks to cgen """ + +from cgen import Block, Generable, Value + + +class CatchBlock(Generable): + def __init__(self, exc_decl, catch_block): + assert isinstance(exc_decl, Value) + self.exc_decl = exc_decl + assert isinstance(catch_block, Block) + self.catch_block = catch_block + + def generate(self): + yield "catch ({})\n".format("".join(self.exc_decl.generate(with_semicolon=False))) + for item in self.catch_block.generate(): + yield item + yield "\n" + + +class TryCatchBlock(Generable): + def __init__(self, try_block, catch_blocks): + # Store the try block + assert isinstance(try_block, Block) + self.try_block = try_block + + assert all(isinstance(b, CatchBlock) for b in catch_blocks) + self.catch_blocks = catch_blocks + + def generate(self): + # Yield the try block + yield "\n" + yield "try\n" + for item in self.try_block.generate(): + yield item + yield "\n" + + # and now yield all the catch blocks + for catch_block in self.catch_blocks: + for item in catch_block.generate(): + yield item diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py index cd5344c0da55f68b88a3ea1809f800105160fbbc..ed4c1310a9274528a3764f2739d7ee59c025ef21 100644 --- a/python/dune/perftool/compile.py +++ b/python/dune/perftool/compile.py @@ -16,13 +16,14 @@ from ufl.algorithms.formfiles import interpret_ufl_namespace from dune.perftool.generation import (delete_cache_items, global_context, ) -from dune.perftool.interactive import start_interactive_session -from dune.perftool.options import get_option, initialize_options +from dune.perftool.options import (get_form_option, + get_option, + initialize_options, + ) from dune.perftool.pdelab.driver import generate_driver -from dune.perftool.pdelab.localoperator import (generate_localoperator_basefile, - generate_localoperator_file, +from dune.perftool.pdelab.localoperator import (generate_localoperator_file, generate_localoperator_kernels, - name_localoperator_file) + ) from dune.perftool.ufl.preprocess import preprocess_form from os.path import splitext, basename, join, dirname, abspath @@ -53,8 +54,8 @@ def read_ufl(uflfile): Returns: -------- - formdatas: List of formdatas found in uflfile. - forms: List of forms found in uflfile. + data: The data in the namespace after execution of the UFL file + and some custom postprocessing. """ # Read the given ufl file and execute it uflcode = read_ufl_file(uflfile) @@ -92,54 +93,39 @@ def read_ufl(uflfile): if get_option("exact_solution_expression"): data.object_by_name[get_option("exact_solution_expression")] = namespace[get_option("exact_solution_expression")] - magic_names = ("dirichlet_expression", + magic_names = ("interpolate_expression", "is_dirichlet", "exact_solution", ) for name in magic_names: data.object_by_name[name] = namespace.get(name, None) - formdatas = [] - forms = data.forms - for index, form in enumerate(forms): - formdatas.append(preprocess_form(form)) - forms[index] = formdatas[index].preprocessed_form + return data - # We expect at least one form - assert len(data.forms) >= 1 - return formdatas, data - - -# This function is the entrypoint of the ufl2pdelab executable -def compile_form(): +def entry_generate_driver(): + """ This is the entry point for driver generation """ initialize_options() - formdatas, data = read_ufl(get_option("uflfile")) + data = read_ufl(get_option("uflfile")) - with global_context(data=data, formdatas=formdatas): - # Generate driver file - if get_option("driver_file"): - generate_driver(formdatas, data) + with global_context(data=data): + generate_driver() - # In case of multiple forms: Genarate one file that includes all localoperator files - if len(formdatas) > 1: - generate_localoperator_basefile(formdatas, data) - # Generate local operator files - for formdata in formdatas: - with global_context(data=data, formdata=formdata): +def entry_generate_operators(): + """ This is the entry point for operator generation """ + initialize_options() + data = read_ufl(get_option("uflfile")) + + with global_context(data=data): + operator = get_option("operator_to_build") + with global_context(form_identifier=operator): # Make sure cache is empty delete_cache_items() - # Create localoperator kernels - if get_option("operator_file"): - kernels = generate_localoperator_kernels(formdata, data) - - # TODO insert sophisticated analysis/feedback loops here - if get_option("interactive"): - start_interactive_session(kernels) + # Choose the form from the UFL input + kernels = generate_localoperator_kernels(operator) - # Create c++ file from kernels - if get_option("operator_file"): - filename = name_localoperator_file(formdata, data) - generate_localoperator_file(formdata, kernels, filename) + # Write the result to a file + filename = get_form_option("filename") + generate_localoperator_file(kernels, filename) diff --git a/python/dune/perftool/error.py b/python/dune/perftool/error.py index 3f99a83abeb947890b5c7fb36ffd2663514796fb..4d428b41b516845c3eb18803539edbf293f44cf1 100644 --- a/python/dune/perftool/error.py +++ b/python/dune/perftool/error.py @@ -15,3 +15,7 @@ class PerftoolCodegenError(PerftoolError): class PerftoolLoopyError(PerftoolError): pass + + +class PerftoolVectorizationError(PerftoolCodegenError): + pass diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index c8c085c178d7dc2224d1efd1d76efaacf9da5259..e541e71366371039157cb757201f710ffd0a19ca 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -43,6 +43,7 @@ from dune.perftool.generation.loopy import (barrier, kernel_cached, noop_instruction, silenced_warning, + subst_rule, temporary_variable, transform, valuearg, diff --git a/python/dune/perftool/generation/cache.py b/python/dune/perftool/generation/cache.py index 474e0e104040bb0caefea5361e5868790380784f..b4ae54f28479d607907ac59c3c6078283535bc85 100644 --- a/python/dune/perftool/generation/cache.py +++ b/python/dune/perftool/generation/cache.py @@ -69,6 +69,7 @@ class _RegisteredFunction(object): on_store=lambda x: x, item_tags=(), context_tags=(), + section=None, **kwargs ): self.func = func @@ -78,6 +79,8 @@ class _RegisteredFunction(object): self.item_tags = item_tags self.context_tags = context_tags self.kwargs = kwargs + if section: + self.item_tags = self.item_tags + (section,) # Initialize the memoization cache self._memoize_cache = {} diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py index 0f44b6950ef9b111f90f0d7a922835912c5b4500..858dc1fcaf50d07b08c044525db994f85ca8de29 100644 --- a/python/dune/perftool/generation/cpp.py +++ b/python/dune/perftool/generation/cpp.py @@ -18,7 +18,7 @@ template_parameter = generator_factory(item_tags=("template_param",), context_ta class_basename = generator_factory(item_tags=("basename",), context_tags=("classtag",)) -@generator_factory(item_tags=("file", "include"), context_tags=("filetag",)) +@generator_factory(item_tags=("file", "include"), context_tags=("filetag",), counted=True) def include_file(include, system=False): return cgen.Include(include, system=system) diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index a97df4744fd1e661554a6febb306a8f858589c31..a4d8292f5f88bc315980efb521bf7c2cf6a95153 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -140,10 +140,9 @@ def _insn_cache_key(code=None, expression=None, **kwargs): def instruction(code=None, expression=None, **kwargs): assert (code is not None) or (expression is not None) assert not ((code is not None) and (expression is not None)) - assert 'id' not in kwargs # Get an ID for this instruction - id = 'insn_{}'.format(str(get_counter('__insn_id')).zfill(4)) + id = kwargs.pop("id", 'insn_{}'.format(str(get_counter('__insn_id')).zfill(4))) # Now create the actual instruction if code: @@ -172,8 +171,8 @@ def noop_instruction(**kwargs): context_tags="kernel", cache_key_generator=no_caching, ) -def transform(trafo, *args): - return (trafo, args) +def transform(trafo, *args, **kwargs): + return (trafo, args, kwargs) @generator_factory(item_tags=("instruction", "barrier"), @@ -216,3 +215,8 @@ def loopy_class_member(name, classtag=None, potentially_vectorized=False, **kwar globalarg(name, **kwargs) return name + + +@generator_factory(item_tags=("substrule",), context_tags="kernel") +def subst_rule(name, args, expr): + return lp.SubstitutionRule(name, args, expr) diff --git a/python/dune/perftool/interactive.py b/python/dune/perftool/interactive.py deleted file mode 100644 index 77094c7524fc2bc5d6a1dc1e0b4254458056ac35..0000000000000000000000000000000000000000 --- a/python/dune/perftool/interactive.py +++ /dev/null @@ -1,144 +0,0 @@ -from __future__ import print_function -from functools import partial - -from dune.perftool.generation import global_context -from dune.perftool.loopy.transformations import get_loopy_transformations -from dune.perftool.pdelab.localoperator import LoopyKernelMethod -from dune.perftool.pdelab.signatures import assembly_routine_signature - -import os - - -# Use the builtin 'input' in python2 and 'raw_input' in python3 -try: - input = raw_input -except: - pass - - -def clear(): - os.system('cls' if os.name == 'nt' else 'clear') - - -def kernel_name(v): - first = None - if v[1] == "residual": - first = "alpha" - if v[1] == "jacobian": - first = "jacobian" - assert first - - second = None - if v[0] == "cell": - second = "volume" - if v[0] == "exterior_facet": - second = "boundary" - if v[0] == "interior_facet": - second = "skeleton" - assert second - - return "{}_{}".format(first, second) - - -def show_kernel(which, kernel): - clear() - print("Showing the loo.py kernel for {}:\n".format(kernel_name(which))) - print(kernel.stringify(with_dependencies=True)) - print("Press Return to return to the previous menu") - input() - return kernel - - -def choose_transformation(which, kernel): - choice = None - while choice != "q": - clear() - keymap = {} - print("Choose one of the following transformations to apply to {}:\n".format(kernel_name(which))) - - print("Transformations:") - for i, v in enumerate(get_loopy_transformations().values()): - print(" {}) {}".format(chr(ord('a') + i), v.name)) - if v.description: - print(" {}".format(v.description)) - keymap[chr(ord('a') + i)] = v - - print("\n q) Return to kernel options") - print("\nYour choice:") - - choice = input().lower() - try: - kernel = keymap[choice](kernel) - except KeyError: - pass - - return kernel - - -def show_code(which, kernel): - clear() - print("Showing the generated dune-pdelab code for {}:\n".format(kernel_name(which))) - - with global_context(integral_type=which[0], form_type=which[1]): - signature = assembly_routine_signature() - print("".join(LoopyKernelMethod(signature, kernel).generate())) - - print("Press Return to return to the previous menu") - input() - return kernel - - -def optimize_kernel(which, kernels): - kernel = kernels[which] - choice = None - - while choice != "q": - clear() - print("Optimizing kernel {}:\n".format(kernel_name(which))) - - print("Available options:") - print(" a) Show the loopy kernel") - print(" b) Apply loopy transformation") - print(" c) Show generated PDELab code for this kernel.") - - print("\n q) Return to the kernel overview") - print("\nYour choice:") - - choice = input().lower() - try: - kernel = {'a': partial(show_kernel, which), - 'b': partial(choose_transformation, which), - 'c': partial(show_code, which) - }[choice](kernel) - except KeyError: - pass - - kernels[which] = kernel - - -def kernel_choice(kernels): - choice = None - while choice != "q": - clear() - print("The following kernels are in the input. Pick one to optimize:") - - keymap = {} - for i, k in enumerate(kernels.keys()): - print(" {}) {}".format(chr(ord('a') + i), kernel_name(k))) - keymap[chr(ord('a') + i)] = partial(optimize_kernel, k) - - print("\n q) End this interactive session and proceed to code generation") - - print("\nYour choice: ") - choice = input().lower() - try: - keymap[choice](kernels) - except KeyError: - pass - - -def start_interactive_session(kernels): - clear() - print("Welcome to the dune-perftool interactive mode!\n") - - kernel_choice(kernels) diff --git a/python/dune/perftool/loopy/buffer.py b/python/dune/perftool/loopy/buffer.py deleted file mode 100644 index c0e2ab310ce4cf6ddc8c8ea403d2e5cad1c63365..0000000000000000000000000000000000000000 --- a/python/dune/perftool/loopy/buffer.py +++ /dev/null @@ -1,55 +0,0 @@ -from dune.perftool.error import PerftoolLoopyError -from dune.perftool.generation import (get_counted_variable, - kernel_cached, - temporary_variable, - ) - - -class FlipFlopBuffer(object): - def __init__(self, identifier): - self.identifier = identifier - - # Initialize the counter that switches between the base storages! - self._current = 0 - - # Generate the base storage names - self.base_storage = tuple("{}_base_{}".format(self.identifier, i) for i in (0, 1)) - - def switch_base_storage(self): - self._current = (self._current + 1) % 2 - - def get_temporary(self, **kwargs): - assert("base_storage" not in kwargs) - assert("storage_shape" not in kwargs) - - # Select the base storage and increase counter - base = self.base_storage[self._current] - - # Construct a temporary name - name = kwargs.pop("name", None) - if name is None: - name = get_counted_variable(self.identifier) - - # Construct the temporary and return it - temporary_variable(name, - base_storage=base, - managed=True, - _base_storage_access_may_be_aliasing=True, - **kwargs - ) - - return name - - -@kernel_cached -def initialize_buffer(identifier): - assert isinstance(identifier, str) - return FlipFlopBuffer(identifier) - - -def get_buffer_temporary(identifier, **kwargs): - return initialize_buffer(identifier).get_temporary(**kwargs) - - -def switch_base_storage(identifier): - initialize_buffer(identifier).switch_base_storage() diff --git a/python/dune/perftool/loopy/mangler.py b/python/dune/perftool/loopy/mangler.py index 297968c6b8332e3af00c4d101656a2e970d63f70..29b0d503bcc0269c7a39d0c3a3accc1657781577 100644 --- a/python/dune/perftool/loopy/mangler.py +++ b/python/dune/perftool/loopy/mangler.py @@ -6,6 +6,7 @@ from dune.perftool.generation import (function_mangler, ) from loopy import CallMangleInfo +from loopy.types import to_loopy_type import numpy as np @@ -48,3 +49,14 @@ def dune_math_manglers(kernel, name, arg_dtypes): (dt,), (dt,) * len(arg_dtypes), ) + + +@function_mangler +def get_time_function_mangler(kernel, name, arg_dtypes): + """ The getTime method is defined on local operators once they inherit from + InstationaryLocalOperatorDefaultMethods + """ + if name == "getTime": + assert(len(arg_dtypes) == 0) + from dune.perftool.loopy.target import dtype_floatingpoint + return CallMangleInfo("this->getTime", (to_loopy_type(dtype_floatingpoint()),), ()) diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index 408a7a3df3a2b4058420db27a1f750ebe98fa7c1..8a89579e70f3c5bad2fc3bd3548836836a0b6d45 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -9,6 +9,7 @@ from dune.perftool.generation import (include_file, retrieve_cache_functions, ) from dune.perftool.options import get_option +from dune.perftool.tools import round_to_multiple from loopy.symbolic import Literal from loopy.target import (TargetBase, @@ -146,7 +147,7 @@ class DuneCExpressionToCodeMapper(CExpressionToCodeMapper): class DuneASTBuilder(CASTBuilder): def function_manglers(self): - return CASTBuilder.function_manglers(self) + retrieve_cache_functions("mangler") + return retrieve_cache_functions("mangler") + CASTBuilder.function_manglers(self) def get_expression_to_c_expression_mapper(self, codegen_state): return DuneExpressionToCExpressionMapper(codegen_state) @@ -154,14 +155,16 @@ class DuneASTBuilder(CASTBuilder): def get_c_expression_to_code_mapper(self): return DuneCExpressionToCodeMapper() - def get_temporary_decl(self, knl, schedule_index, temp_var, decl_info): + def get_temporary_decl(self, codegen_state, schedule_index, temp_var, decl_info): # If this is not a DuneTemporaryVariable, it was introduced by loopy # and it should be totally under loopys control: Call the base class implementation! if not (isinstance(temp_var, DuneTemporaryVariable) and temp_var.custom_declaration): - return CASTBuilder.get_temporary_decl(self, knl, schedule_index, temp_var, decl_info) + return CASTBuilder.get_temporary_decl(self, codegen_state, schedule_index, temp_var, decl_info) - if temp_var.decl_method: - return cgen.Line(temp_var.decl_method(temp_var.name, temp_var.shape, temp_var.shape_impl)) + if temp_var.custom_declaration: + decl = temp_var.decl_method(temp_var.name, codegen_state.kernel, decl_info) + if decl: + return cgen.Line(decl) def add_vector_access(self, access_expr, index): # There is no generic way of implementing a vector access with VCL, as @@ -176,10 +179,34 @@ class DuneASTBuilder(CASTBuilder): return cgen.Line("BARRIER;") def get_temporary_decls(self, codegen_state, schedule_index): + temps = codegen_state.kernel.temporary_variables.values() + # Declare all the custom base storages + ret = [] + for bs in set(t.custom_base_storage for t in temps if isinstance(t, DuneTemporaryVariable)) - set({None}): + if bs in [a.name for a in codegen_state.kernel.args]: + continue + + # Find the alignment bytes + alignment = [] + size = [] + for t in temps: + if isinstance(t, DuneTemporaryVariable) and t.custom_base_storage == bs: + # TODO Extract alignment from the temporaries after switching to loopy 2018.1 + alignment.append(get_option("max_vector_width") // 8) + from pytools import product + size.append(product(t.shape)) + + alignment = max(alignment) + size = max(size) + size = round_to_multiple(size, alignment) + + decl = "char {}[{}] __attribute__ ((aligned({})));".format(bs, size * 8, alignment) + ret.append(cgen.Line(decl)) + if self.target.declare_temporaries: - return CASTBuilder.get_temporary_decls(self, codegen_state, schedule_index) + return ret + CASTBuilder.get_temporary_decls(self, codegen_state, schedule_index) else: - return [] + return ret class BlockstructuredDuneExpressionToCExpressionMapper(DuneExpressionToCExpressionMapper): diff --git a/python/dune/perftool/loopy/temporary.py b/python/dune/perftool/loopy/temporary.py index d916f6b0312ac763d60829047a52056d088014bc..2bf78ce94c573ca0f1614fc89b72fee52fa16333 100644 --- a/python/dune/perftool/loopy/temporary.py +++ b/python/dune/perftool/loopy/temporary.py @@ -5,6 +5,7 @@ from dune.perftool.error import PerftoolLoopyError from loopy import TemporaryVariable +import loopy as lp import numpy @@ -27,7 +28,10 @@ def _temporary_type(shape_impl, shape, first=True): return "Dune::FieldMatrix<{}, {}, {}>".format(_type, shape[0], shape[1]) -def default_declaration(name, shape=(), shape_impl=()): +def default_declaration(name, kernel, decl_info): + shape = kernel.temporary_variables[name].shape + shape_impl = kernel.temporary_variables[name].shape_impl + # Determine the C++ type to use for this temporary. t = _temporary_type(shape_impl, shape) if len(shape_impl) == 0: @@ -44,11 +48,20 @@ def default_declaration(name, shape=(), shape_impl=()): return '{} {}(0.0);'.format(t, name) +def custom_base_storage_temporary_declaration(storage): + def _decl(name, kernel, decl_info): + dtype = kernel.temporary_variables[name].dtype + _type = kernel.target.dtype_to_typename(decl_info.dtype) + return "{0} *{1} = ({0} *){2};".format(_type, name, storage) + + return _decl + + class DuneTemporaryVariable(TemporaryVariable): - allowed_extra_kwargs = TemporaryVariable.allowed_extra_kwargs + ["managed", "shape_impl", "decl_method"] + allowed_extra_kwargs = TemporaryVariable.allowed_extra_kwargs + ["managed", "shape_impl", "decl_method", "custom_base_storage"] - def __init__(self, name, managed=False, shape_impl=None, decl_method=None, **kwargs): + def __init__(self, name, managed=False, shape_impl=None, decl_method=None, custom_base_storage=None, **kwargs): self.managed = managed self.decl_method = decl_method self.shape_impl = shape_impl @@ -59,6 +72,15 @@ class DuneTemporaryVariable(TemporaryVariable): from dune.perftool.loopy.target import dtype_floatingpoint kwargs.setdefault('dtype', dtype_floatingpoint()) + if custom_base_storage and self.decl_method is None: + assert shape_impl is None + self.decl_method = custom_base_storage_temporary_declaration(custom_base_storage) + self.custom_declaration = self.decl_method is not None - TemporaryVariable.__init__(self, name, managed=self.managed, shape_impl=self.shape_impl, decl_method=self.decl_method, **kwargs) + TemporaryVariable.__init__(self, name, + managed=self.managed, + shape_impl=self.shape_impl, + decl_method=self.decl_method, + custom_base_storage=custom_base_storage, + **kwargs) diff --git a/python/dune/perftool/loopy/transformations/__init__.py b/python/dune/perftool/loopy/transformations/__init__.py index db43b04544a9d0852d9181843e2a0fd554ac2bb6..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/python/dune/perftool/loopy/transformations/__init__.py +++ b/python/dune/perftool/loopy/transformations/__init__.py @@ -1,35 +0,0 @@ -""" Infrastructure for loopy transformations. -These are registered to list them in interactive mode -""" - -_loopy_trafo_registry = {} - - -def get_loopy_transformations(): - return _loopy_trafo_registry - - -class LoopyTransformationWrapper(object): - def __init__(self, f, name=None, description=""): - self.func = f - self.name = name - self.description = description - - assert name - assert name not in _loopy_trafo_registry - - _loopy_trafo_registry[name] = self - - def __call__(self, kernel): - return self.func(kernel) - - -def loopy_transformation(_positional_arg=None, **kwargs): - assert not _positional_arg - return lambda f: LoopyTransformationWrapper(f, **kwargs) - - -# Just for debugging purposes we add an identity transformation here. -@loopy_transformation(name="identity", description='''Does not change the kernel. Proof of concept implementation''') -def _identity(kernel): - return kernel diff --git a/python/dune/perftool/loopy/transformations/disjointgroups.py b/python/dune/perftool/loopy/transformations/disjointgroups.py deleted file mode 100644 index 78ea4c9d6574f294416c5bb0f6c440083ddd3603..0000000000000000000000000000000000000000 --- a/python/dune/perftool/loopy/transformations/disjointgroups.py +++ /dev/null @@ -1,13 +0,0 @@ -""" A helper transformation that makes all groups conflicting """ - -from dune.perftool.options import get_option - - -def make_groups_conflicting(knl): - # As this transformation introduces a performance bug that basically - # kills our CI, we only apply it if really needed - meaning in production. - if get_option("assure_statement_ordering"): - groups = frozenset().union(*tuple(i.groups for i in knl.instructions)) - return knl.copy(instructions=[i.copy(conflicts_with_groups=groups - i.groups) for i in knl.instructions]) - else: - return knl diff --git a/python/dune/perftool/loopy/transformations/instrumentation.py b/python/dune/perftool/loopy/transformations/instrumentation.py new file mode 100644 index 0000000000000000000000000000000000000000..89b08b6f0e191ca06db56d820c585ebe585e250b --- /dev/null +++ b/python/dune/perftool/loopy/transformations/instrumentation.py @@ -0,0 +1,101 @@ +""" Add instrumentation instructions to a kernel """ + +from dune.perftool.generation import (dump_accumulate_timer, + post_include, + ) +from dune.perftool.options import get_option + +import loopy as lp + + +def _intersect(a): + """ Return intersection of a given tuple of frozensets. Also works for empty tuple """ + if len(a) == 0: + return frozenset() + return frozenset.intersection(*a) + + +def _union(a): + """ Return union of a given tuple of frozensets. Also works for empty tuple """ + if len(a) == 0: + return frozenset() + return frozenset.union(*a) + + +def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False): + """ Transform loopy kernel to contain instrumentation code + + Arguments: + knl : The loopy kernel, follows the loopy transformation convention + match : A loopy match object or a string (interpreted as instruction ID or tag) to describe + which instructions should be wrapped in an instrumentation block. + identifier : The name of the counter to start and stop + level : The instrumentation level this measurement is defined at + filetag : The tag of the file that should contain the counter definitions + """ + # If the instrumentation level is not high enough, this is a no-op + if level > get_option("instrumentation_level"): + return knl + + # If a string was given for match, heuristically make it a match object + if isinstance(match, str): + match = lp.match.Or((lp.match.Id(match), lp.match.Tagged(match))) + + # Find the instructions to wrap in instrumentation + insns = lp.find_instructions(knl, match) + rewritten_insns = [] + + # If the match is empty, this is also no op + if not insns: + return knl + + # Determine the iname nesting of the timing block + insn_inames = _intersect(tuple(i.within_inames for i in insns)) + other_inames = _union(tuple(i.within_inames for i in lp.find_instructions(knl, lp.match.Not(match)))) + within = _intersect((insn_inames, other_inames)) + + # Get a unique identifer - note that the same timer could be started and stopped several times + # within one kernel... + ident = identifier + if lp.find_instructions(knl, lp.match.Id("{}_start".format(identifier))): + ident = "{}_".format(ident) + + # Define the start instruction and correct dependencies for it + start_id = "{}_start".format(ident) + start_depends = _union(tuple(i.depends_on for i in insns)).difference(frozenset(i.id for i in insns)) + start_insn = lp.CInstruction([], + "HP_TIMER_START({});".format(identifier), + id=start_id, + within_inames=within, + depends_on=start_depends, + boostable_into=frozenset(), + ) + + # Add dependencies on the timing instructions + rewritten_insns.extend([i.copy(depends_on=i.depends_on.union(frozenset({start_id}))) for i in insns]) + + # Define the stop instruction and correct dependencies for it + stop_id = "{}_stop".format(ident) + stop_insn = lp.CInstruction([], + "HP_TIMER_STOP({});".format(identifier), + id=stop_id, + within_inames=within, + depends_on=frozenset(i.id for i in insns), + boostable_into=frozenset(), + ) + + # Find all the instructions that should depend on stop + dep_insns = filter(lambda i: _intersect((i.depends_on, frozenset(i.id for i in insns))), + lp.find_instructions(knl, lp.match.Not(match)) + ) + rewritten_insns.extend([i.copy(depends_on=i.depends_on.union(frozenset({stop_id}))) for i in dep_insns]) + + # Trigger code generation on the file/operator level + post_include('HP_DECLARE_TIMER({});'.format(identifier), filetag=filetag) + dump_accumulate_timer(identifier) + + # Filter all the instructions which were untouched + other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions)) + + # Add all the modified instructions into the kernel object + return knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn]) diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py index fa5b03c204b4d77f628a7566897d33dae3e67a7c..f82f482929a5eaa8f742bbe388c8eeff5bce4ceb 100644 --- a/python/dune/perftool/loopy/transformations/vectorize_quad.py +++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py @@ -7,8 +7,7 @@ from dune.perftool.generation import (function_mangler, ) from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.vcl import get_vcl_type, get_vcl_type_size -from dune.perftool.loopy.transformations.vectorview import (add_temporary_with_vector_view, - add_vector_view, +from dune.perftool.loopy.transformations.vectorview import (add_vector_view, get_vector_view_name, ) from dune.perftool.loopy.symbolic import substitute @@ -149,7 +148,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): knl = knl.copy(temporary_variables=tmps) # Introduce a vector view of the precomputation result - knl = add_vector_view(knl, prec_quantity, flatview=True) + knl = add_vector_view(knl, prec_quantity) # # Construct a flat loop for the given instructions @@ -196,7 +195,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): horizontal, vertical = tuple(int(i) for i in re.match("vecsumfac_h(.*)_v(.*)", tag).groups()) # 1. Rotating the input data - knl = add_vector_view(knl, quantity, flatview=True) + knl = add_vector_view(knl, quantity) if horizontal > 1: new_insns.append(lp.CallInstruction((), # assignees prim.Call(TransposeReg(vertical=vertical, horizontal=horizontal), @@ -207,6 +206,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): within_inames=common_inames.union(frozenset({outer_iname, vec_iname})), within_inames_is_final=True, id="{}_rotate{}".format(quantity, suffix), + tags=frozenset({"sumfact_stage2"}), )) # Add substitution rules @@ -219,7 +219,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): elif tag is not None and tag == 'sumfac': # Add a vector view to this quantity expr, = quantity_exprs - knl = add_vector_view(knl, quantity, flatview=True) + knl = add_vector_view(knl, quantity) replacemap[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (vector_indices.get(1), prim.Variable(vec_iname)), ) @@ -243,7 +243,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): for insn in insns: # Get a vector view of the lhs expression lhsname = get_pymbolic_basename(insn.assignee) - knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True) + knl = add_vector_view(knl, lhsname) lhsname = get_vector_view_name(lhsname) rotating = "gradvec" in insn.tags @@ -268,7 +268,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): within_inames=common_inames.union(frozenset({outer_iname, vec_iname})), within_inames_is_final=True, id=insn.id, - tags=frozenset({"vec_write{}".format(suffix)}) + tags=frozenset({"vec_write{}".format(suffix), "sumfact_stage2"}) ) ) @@ -283,6 +283,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): within_inames=common_inames.union(frozenset({outer_iname, vec_iname})), within_inames_is_final=True, id="{}_rotateback{}".format(lhsname, suffix), + tags=frozenset({"sumfact_stage2"}), )) # Add the necessary vector indices @@ -297,6 +298,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): within_inames=common_inames, within_inames_is_final=True, id="assign_{}{}".format(name, suffix), + tags=frozenset({"sumfact_stage2"}), )) new_insns.append(lp.Assignment(prim.Variable(name), # assignee prim.Sum((prim.Variable(name), increment)), # expression @@ -305,6 +307,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): depends_on=frozenset({Tagged("vec_write{}".format(suffix)), "assign_{}{}".format(name, suffix)}), depends_on_is_final=True, id="update_{}{}".format(name, suffix), + tags=frozenset({"sumfact_stage2"}), )) from loopy.kernel.creation import resolve_dependencies diff --git a/python/dune/perftool/loopy/transformations/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py index 1160c8bda81f0f81f6a6c9c6b8c2db327b0fe79d..3a812abf50289b58e14bd2f155d155cc7bc3155d 100644 --- a/python/dune/perftool/loopy/transformations/vectorview.py +++ b/python/dune/perftool/loopy/transformations/vectorview.py @@ -5,7 +5,9 @@ being a an array of SIMD vectors """ from dune.perftool.loopy.target import dtype_floatingpoint +from dune.perftool.loopy.temporary import DuneTemporaryVariable from dune.perftool.loopy.vcl import get_vcl_type_size +from dune.perftool.tools import round_to_multiple import loopy as lp import numpy as np @@ -17,83 +19,47 @@ def get_vector_view_name(tmpname): return tmpname + "_vec" -def add_vector_view(knl, tmpname, pad_to=None, flatview=False): - """ - Kernel transformation to add a vector view temporary - that interprets the same memory as another temporary - """ +def add_vector_view(knl, tmpname, pad_to=1): temporaries = knl.temporary_variables - assert tmpname in temporaries temp = temporaries[tmpname] - vecname = get_vector_view_name(tmpname) + vectemp = get_vector_view_name(tmpname) bsname = tmpname + "_base" + vecsize = get_vcl_type_size(temp.dtype) - if vecname in knl.temporary_variables: + # Enforce idempotency + if vectemp in temporaries: return knl - # Add base storage to the original temporary! - if not temp.base_storage: - temp = temp.copy(base_storage=bsname) - temporaries[tmpname] = temp - else: - bsname = temp.base_storage - - # Determine the shape by dividing total size by vector size - # Also apply the padding we need for rotation - # TODO: *Only* apply this padding if really needed (a bit hard to figure out) - vecsize = get_vcl_type_size(temp.dtype) - if all(isinstance(s, int) for s in temp.shape): - size = pt.product(temp.shape) // vecsize - if size % vecsize != 0: - size = (size // vecsize + 1) * vecsize + # Modify the original temporary to use our custom base storage mechanism + if isinstance(temp, DuneTemporaryVariable): + if temp.custom_base_storage: + bsname = temp.custom_base_storage + else: + temp = temp.copy(custom_base_storage=bsname) + temporaries[tmpname] = temp else: - size = prim.FloorDiv(prim.Product(temp.shape), vecsize) - size = (size // vecsize + 1) * vecsize - - # Maybe do some padding. - if pad_to: - size = (size // pad_to + 1) * pad_to + temp = DuneTemporaryVariable(custom_base_storage=bsname, + managed=True, + **temp.get_copy_kwargs() + ) + temporaries[tmpname] = temp - # Some vectorview are intentionally flat! (e.g. the output buffers of - # sum factorization kernels - if flatview: - shape = (size, vecsize) - dim_tags = "c,vec" - else: - shape = temp.shape - # This works around a loopy weirdness (which might as well be a bug) - # TODO: investigate this! - if len(shape) == 1: - shape = (1, vecsize) - dim_tags = "c,vec" - else: - dim_tags = temp.dim_tags[:-1] + ("vec",) + size = round_to_multiple(pt.product(temp.shape), vecsize) // vecsize + size = round_to_multiple(size, pad_to) # Now add a vector view temporary - vecname = tmpname + "_vec" - temporaries[vecname] = lp.TemporaryVariable(vecname, - dim_tags=dim_tags, - shape=shape, - base_storage=bsname, - dtype=dtype_floatingpoint(), - scope=lp.temp_var_scope.PRIVATE, - ) - - # Avoid that any of these temporaries are eliminated - silenced = ['temp_to_write({})'.format(tmpname), - 'temp_to_write({})'.format(vecname), - 'read_no_write({})'.format(tmpname), - 'read_no_write({})'.format(vecname), + temporaries[vectemp] = DuneTemporaryVariable(vectemp, + dim_tags="c,vec", + shape=(size, vecsize), + custom_base_storage=bsname, + scope=lp.temp_var_scope.PRIVATE, + managed=True, + ) + + # Avoid that these temporaries are eliminated + silenced = ['temp_to_write({})'.format(vectemp), + 'read_no_write({})'.format(vectemp), ] return knl.copy(temporary_variables=temporaries, silenced_warnings=knl.silenced_warnings + silenced) - - -def add_temporary_with_vector_view(knl, name, *args, **kwargs): - temps = knl.temporary_variables - assert name not in temps - temps[name] = lp.TemporaryVariable(name, *args, **kwargs) - knl = knl.copy(temporary_variables=temps) - knl = add_vector_view(knl, name) - return knl diff --git a/python/dune/perftool/loopy/vcl.py b/python/dune/perftool/loopy/vcl.py index 191889c00c9e5a4790a4b4bb9e0a25a1e3f2c16d..345dec931596c07c8641a03f5c0c5035f86d06b6 100644 --- a/python/dune/perftool/loopy/vcl.py +++ b/python/dune/perftool/loopy/vcl.py @@ -2,7 +2,7 @@ Our extensions to the loopy type system """ from dune.perftool.options import get_option -from dune.perftool.generation import function_mangler +from dune.perftool.generation import function_mangler, include_file import loopy as lp import numpy as np @@ -62,8 +62,10 @@ def get_vcl_typename(nptype, register_size=None, vector_width=None): class ExplicitVCLCast(lp.symbolic.FunctionIdentifier): - def __init__(self, nptype, vector_width): + def __init__(self, nptype, vector_width=None): self.nptype = nptype + if vector_width is None: + vector_width = get_vcl_type_size(nptype) self.vector_width = vector_width def __getinitargs__(self): @@ -74,8 +76,17 @@ class ExplicitVCLCast(lp.symbolic.FunctionIdentifier): return get_vcl_typename(self.nptype, vector_width=self.vector_width) +class VCLLowerUpperLoad(ExplicitVCLCast): + pass + + @function_mangler def vcl_cast_mangler(knl, func, arg_dtypes): + if isinstance(func, VCLLowerUpperLoad): + return lp.CallMangleInfo(func.name, + (lp.types.NumpyType(func.nptype),), + arg_dtypes) + if isinstance(func, ExplicitVCLCast): return lp.CallMangleInfo(func.name, (lp.types.NumpyType(func.nptype),), (arg_dtypes[0],)) @@ -107,10 +118,11 @@ def vcl_function_mangler(knl, func, arg_dtypes): vcl = lp.types.NumpyType(get_vcl_type(dtype)) return lp.CallMangleInfo("select", (vcl,), (vcl, vcl, vcl)) - if func == "horizontal_add": + if func in ("horizontal_add", "horizontal_add_lower", "horizontal_add_upper"): dtype = arg_dtypes[0] vcl = lp.types.NumpyType(get_vcl_type(dtype)) - return lp.CallMangleInfo("horizontal_add", (lp.types.NumpyType(dtype.dtype),), (vcl,)) + include_file("dune/perftool/sumfact/horizontaladd.hh", filetag="operatorfile") + return lp.CallMangleInfo(func, (lp.types.NumpyType(dtype.dtype),), (vcl,)) if isinstance(func, VCLPermute): dtype = arg_dtypes[0] diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 9f7c0e19619317b040083d98796939148fe29fb0..1ecc85c5903b69f83953502e8afa485f5a962702 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -25,10 +25,10 @@ class PerftoolOption(ImmutableRecord): ) -class PerftoolOptionsArray(ImmutableRecord): +class PerftoolGlobalOptionsArray(ImmutableRecord): """ A collection of form compiler arguments """ def __init__(self, **kwargs): - opts = {k: v.default for k, v in PerftoolOptionsArray.__dict__.items() if isinstance(v, PerftoolOption)} + opts = {k: v.default for k, v in PerftoolGlobalOptionsArray.__dict__.items() if isinstance(v, PerftoolOption)} opts.update(**kwargs) ImmutableRecord.__init__(self, **opts) @@ -36,23 +36,46 @@ class PerftoolOptionsArray(ImmutableRecord): uflfile = PerftoolOption(helpstr="the UFL file to compile") debug_cache_with_stack = PerftoolOption(default=False, helpstr="Store stack along with cache objects. Makes debugging caching issues easier.") driver_file = PerftoolOption(helpstr="The filename for the generated driver header") - operator_file = PerftoolOption(helpstr="The filename for the generated local operator header") - numerical_jacobian = PerftoolOption(default=False, helpstr="use numerical jacobians (only makes sense, if uflpdelab for some reason fails to generate analytic jacobians)") - matrix_free = PerftoolOption(default=False, helpstr="Use iterative solver with matrix free jacobian application") explicit_time_stepping = PerftoolOption(default=False, helpstr="use explicit time stepping") exact_solution_expression = PerftoolOption(helpstr="name of the exact solution expression in the ufl file") compare_l2errorsquared = PerftoolOption(helpstr="maximal allowed l2 error squared of difference between numerical solution and interpolation of exact solution (NOTE: requires --exact-solution-expression)") - interactive = PerftoolOption(default=False, helpstr="whether the optimization process should be guided interactively (also useful for debugging)") + l2error_tree_path = PerftoolOption(default=None, helpstr="Tree pathes that should be considered for l2 error calculation. Default None means we take all of them into account.") + ini_file = PerftoolOption(helpstr="An inifile to use. A generated driver will be hard-coded to it, a [formcompiler] section will be used as default values to form compiler arguments (use snake case)") + opcounter = PerftoolOption(default=False, helpstr="Count operations. Note: In this case only operator applications are generated since solving and operator counting does not work. You probably want to set instrumentation level>0.") + performance_measuring = PerftoolOption(default=False, helpstr="Generate opcounter codepath, but only measure times!") + instrumentation_level = PerftoolOption(default=0, helpstr="Control time/opcounter measurements. 0-do nothing, 1-measure program as a whole, 2-operator applications, 3-measure kernel (eg. alpha-volume, ...), 4-parts of kernel (eg. stage 1-3 of SF)") + project_basedir = PerftoolOption(helpstr="The base (build) directory of the dune-perftool project") + architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl|skylake") + yaspgrid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") + precision_bits = PerftoolOption(default=64, helpstr="The number of bits for the floating point type") + overlapping = PerftoolOption(default=False, helpstr="Use an overlapping solver and constraints. You still need to make sure to construct a grid with overlap! The parallel option will be set automatically.") + operators = PerftoolOption(default="r", helpstr="A comma separated list of operators, each name will be interpreted as a subsection name within the formcompiler section") + target_name = PerftoolOption(default=None, helpstr="The target name from CMake") + operator_to_build = PerftoolOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!") + + # Arguments that are mainly to be set by logic depending on other options + max_vector_width = PerftoolOption(default=256, helpstr=None) + parallel = PerftoolOption(default=False, helpstr="Mark that this program should be run in parallel. If set to true the c++ code will check that there are more than 1 MPI-ranks involved and the error computation will use communication.") + + +class PerftoolFormOptionsArray(ImmutableRecord): + """ A collection of form-specific form compiler arguments """ + def __init__(self, **kwargs): + opts = {k: v.default for k, v in PerftoolFormOptionsArray.__dict__.items() if isinstance(v, PerftoolOption)} + opts.update(**kwargs) + ImmutableRecord.__init__(self, **opts) + + # Form specific options + form = PerftoolOption(default=None, helpstr="The name of the UFL object representing the form in the UFL file") + filename = PerftoolOption(default=None, helpstr="The filename to use for this LocalOperator") + classname = PerftoolOption(default=None, helpstr="The name of the C++ class to generate") + numerical_jacobian = PerftoolOption(default=False, helpstr="use numerical jacobians (only makes sense, if uflpdelab for some reason fails to generate analytic jacobians)") + matrix_free = PerftoolOption(default=False, helpstr="Generate jacobian_apply_* methods for matrix free solvers") print_transformations = PerftoolOption(default=False, helpstr="print out dot files after ufl tree transformations") print_transformations_dir = PerftoolOption(default=".", helpstr="place where to put dot files (can be omitted)") quadrature_order = PerftoolOption(_type=int, helpstr="Quadrature order used for all integrals.") diagonal_transformation_matrix = PerftoolOption(default=False, helpstr="set option if the jacobian of the transformation is diagonal (axiparallel grids)") constant_transformation_matrix = PerftoolOption(default=False, helpstr="set option if the jacobian of the transformation is constant on a cell") - ini_file = PerftoolOption(helpstr="An inifile to use. A generated driver will be hard-coded to it, a [formcompiler] section will be used as default values to form compiler arguments (use snake case)") - opcounter = PerftoolOption(default=False, helpstr="Count operations. Note: In this case only oparor applications are generated since solving and operator counting does not work. You probably want to set instrumentation level>0.") - time_opcounter = PerftoolOption(default=False, helpstr="Generate opcounter codepath. Can be used for timing opcounter programs without setting the opcounter option.") - instrumentation_level = PerftoolOption(default=0, helpstr="Control time/opcounter measurements. 0-do nothing, 1-measure program as a whole, 2-operator applications, 3-measure kernel (eg. alpha-volume, ...), 4-parts of kernel (eg. stage 1-3 of SF)") - project_basedir = PerftoolOption(helpstr="The base (build) directory of the dune-perftool project") fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") @@ -61,36 +84,37 @@ class PerftoolOptionsArray(ImmutableRecord): vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization") vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") - turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.") - architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl") - grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") - simplify = PerftoolOption(default=True, helpstr="Whether to simplify expressions using sympy") - precision_bits = PerftoolOption(default=64, helpstr="The number of bits for the floating point type") - assure_statement_ordering = PerftoolOption(default=False, helpstr="Whether special care should be taken for a good statement ordering in sumfact kernels, runs into a loopy scheduler performance bug, but is necessary for production.") - - # Arguments that are mainly to be set by logic depending on other options - max_vector_width = PerftoolOption(default=256, helpstr=None) + vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).") + simplify = PerftoolOption(default=False, helpstr="Whether to simplify expressions using sympy") + generate_jacobians = PerftoolOption(default=True, helpstr="Whether jacobian_* methods should be generated. This is set to false automatically, when numerical_jacobian is set to true.") + generate_residuals = PerftoolOption(default=True, helpstr="Whether alpha_* methods should be generated.") unroll_dimension_loops = PerftoolOption(default=False, helpstr="whether loops over the geometric dimension should be unrolled") precompute_quadrature_info = PerftoolOption(default=True, helpstr="compute quadrature points and weights in the constructor of the local operator") blockstructured = PerftoolOption(default=False, helpstr="Use block structure") number_of_blocks = PerftoolOption(default=1, helpstr="Number of sub blocks in one direction") vectorization_blockstructured = PerftoolOption(default=False, helpstr="Vectorize block structuring") - + adjoint = PerftoolOption(default=False, helpstr="Generate adjoint operator") + control = PerftoolOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable") + objective_function = PerftoolOption(default=None, helpstr="Name of form representing the objective function in UFL file") + control_variable = PerftoolOption(default=None, helpstr="Name of control variable in UFL file") + block_preconditioner_diagonal = PerftoolOption(default=False, helpstr="Whether this operator should implement the diagonal part of a block preconditioner") + block_preconditioner_offdiagonal = PerftoolOption(default=False, helpstr="Whether this operator should implement the off-diagonal part of a block preconditioner") # Until more sophisticated logic is needed, we keep the actual option data in this module -_options = PerftoolOptionsArray() +_global_options = PerftoolGlobalOptionsArray() +_form_options = {} def initialize_options(): """ Initialize the options from the command line """ - global _options - _options = update_options_from_commandline(_options) - _options = update_options_from_inifile(_options) + global _global_options + _global_options = update_options_from_commandline(_global_options) + _global_options = update_options_from_inifile(_global_options) def update_options_from_commandline(opt): """ Return an options array object with updated values from the commandline """ - assert isinstance(opt, PerftoolOptionsArray) + assert isinstance(opt, PerftoolGlobalOptionsArray) parser = ArgumentParser(description="Compile UFL files to PDELab C++ code", epilog="Please report bugs to dominic.kempf@iwr.uni-heidelberg.de", ) @@ -106,26 +130,58 @@ def update_options_from_commandline(opt): def update_options_from_inifile(opt): """ Return an options array object with updated values from an inifile """ if opt.ini_file: - def _fix_types(k, v): - if hasattr(type(opt), k) and getattr(type(opt), k).type is bool: - return bool(eval(v)) - if hasattr(type(opt), k): - return getattr(type(opt), k).type(v) - return v - ini = parse_ini_file(opt.ini_file).get("formcompiler", {}) - ini = {k: _fix_types(k, v) for k, v in ini.items()} - opt = opt.copy(**ini) + def parse_ini(section, opttype): + def _fix_types(k, v): + if hasattr(opttype, k) and getattr(opttype, k).type is bool: + return bool(eval(v)) + if hasattr(opttype, k): + return getattr(opttype, k).type(v) + return v + ini = parse_ini_file(opt.ini_file).get(section, {}) + return {k: _fix_types(k, v) for k, v in ini.items()} + + opt = opt.copy(**parse_ini("formcompiler", PerftoolGlobalOptionsArray)) + # Also parse form-specific options + for form in [i.strip() for i in opt.operators.split(",")]: + _form_options[form] = PerftoolFormOptionsArray(**parse_ini("formcompiler.{}".format(form), PerftoolFormOptionsArray)) + return opt @memoize -def process_options(opt): +def process_global_options(opt): """ Make sure that the options have been fully processed """ opt = expand_architecture_options(opt) + if opt.overlapping: + opt = opt.copy(parallel=True) + + return opt + + +@memoize +def process_form_options(opt, form): if opt.sumfact: opt = opt.copy(unroll_dimension_loops=True) + if opt.numerical_jacobian: + opt = opt.copy(generate_jacobians=False) + + if opt.form is None: + opt = opt.copy(form=form) + + if opt.classname is None: + opt = opt.copy(classname="{}Operator".format(form)) + + if opt.filename is None: + opt = opt.copy(filename="{}_{}_file.hh".format(get_option("target_name"), opt.classname)) + + if opt.block_preconditioner_diagonal or opt.block_preconditioner_offdiagonal: + assert opt.numerical_jacobian is False + opt = opt.copy(generate_residuals=False, + generate_jacobians=False, + matrix_free=True, + ) return opt @@ -134,6 +190,8 @@ def expand_architecture_options(opt): return opt.copy(max_vector_width=256) elif opt.architecture == "knl": return opt.copy(max_vector_width=512) + elif opt.architecture == "skylake": + return opt.copy(max_vector_width=512) else: raise NotImplementedError("Architecture {} not known!".format(opt.architecture)) @@ -145,18 +203,44 @@ def set_option(key, value): overwritten. Form compiler arguments will always be set before any other options. """ - global _options - _options = process_options(_options).copy(**{key: value}) + global _global_options + _global_options = process_global_options(_global_options).copy(**{key: value}) + + +def set_form_option(key, value, form=None): + if form is None: + from dune.perftool.generation import get_global_context_value + form = get_global_context_value("form_identifier", 0) + if isinstance(form, int): + form = get_option("operators").split(",")[form].strip() + _form_options[form] = _form_options[form].copy(**{key: value}) def get_option(key): - return getattr(process_options(_options), key) + processed_global_opts = process_global_options(_global_options) + return getattr(processed_global_opts, key) + + +def get_form_option(key, form=None): + if form is None: + from dune.perftool.generation import get_global_context_value + form = get_global_context_value("form_identifier", 0) + if isinstance(form, int): + form = get_option("operators").split(",")[form].strip() + processed_form_opts = process_form_options(_form_options[form], form) + return getattr(processed_form_opts, key) def option_switch(opt): def _switch(): - if get_option(opt): - return opt - else: - return "default" + try: + if get_option(opt): + return opt + else: + return "default" + except AttributeError: + if get_form_option(opt): + return opt + else: + return "default" return _switch diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py index 2450b8a6912550eefd6c98cd70a06228b6ad216e..d3153ca0b8612b5391843a8689fa2c1c9d16bc56 100644 --- a/python/dune/perftool/pdelab/__init__.py +++ b/python/dune/perftool/pdelab/__init__.py @@ -13,6 +13,7 @@ from dune.perftool.pdelab.argument import (pymbolic_apply_function, from dune.perftool.pdelab.basis import (pymbolic_basis, pymbolic_reference_gradient, ) +from dune.perftool.pdelab.function import pymbolic_gridfunction from dune.perftool.pdelab.geometry import (component_iname, pymbolic_cell_volume, pymbolic_facet_area, @@ -25,9 +26,6 @@ from dune.perftool.pdelab.geometry import (component_iname, ) from dune.perftool.pdelab.index import (name_index, ) -from dune.perftool.pdelab.parameter import (cell_parameter_function, - intersection_parameter_function, - ) from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_weight, pymbolic_quadrature_position, quadrature_inames, @@ -101,15 +99,8 @@ class PDELabInterface(object): def pymbolic_apply_function(self, element, restriction, index): return pymbolic_apply_function(self.visitor, element, restriction, index) - # - # Parameter function related generator functions - # - - def intersection_parameter_function(self, name, expr, cellwise_constant): - return intersection_parameter_function(name, expr, cellwise_constant) - - def cell_parameter_function(self, name, expr, restriction, cellwise_constant): - return cell_parameter_function(name, expr, restriction, cellwise_constant) + def pymbolic_gridfunction(self, coeff, restriction, grad): + return pymbolic_gridfunction(coeff, restriction, grad) # # Tensor expression related generator functions diff --git a/python/dune/perftool/pdelab/adjoint.py b/python/dune/perftool/pdelab/adjoint.py new file mode 100644 index 0000000000000000000000000000000000000000..aef03bbbf08e45f2510bd90f164bdd70b071580c --- /dev/null +++ b/python/dune/perftool/pdelab/adjoint.py @@ -0,0 +1,171 @@ +import logging + +import numpy + +from loopy import CallMangleInfo +from loopy.symbolic import FunctionIdentifier +from loopy.types import NumpyType + +import pymbolic.primitives as prim + +from dune.perftool.generation import (class_member, + constructor_parameter, + function_mangler, + get_global_context_value, + global_context, + globalarg, + initializer_list, + template_parameter, + ) +from dune.perftool.options import (get_form_option, + ) +from dune.perftool.loopy.target import dtype_floatingpoint +from dune.perftool.pdelab import PDELabInterface +from dune.perftool.pdelab.localoperator import (boundary_predicates, + determine_accumulation_space, + extract_kernel_from_cache, + ) + + +@template_parameter(classtag="operator") +def type_dJdm(): + return "DJDM_VEC" + + +def name_dJdm_constructor_argument(name): + _type = type_dJdm() + constructor_name = name + "_" + constructor_parameter("{}&".format(_type), constructor_name, classtag="operator") + return constructor_name + + +@class_member(classtag="operator") +def define_dJdm_member(name): + _type = type_dJdm() + param = name_dJdm_constructor_argument(name) + initializer_list(name, [param, ], classtag="operator") + return "{}& {};".format(_type, name) + + +def generate_accumulation_instruction(expr, visitor, accumulation_index, number_of_controls): + # Create class member dJdm for accumulating + accumvar = "dJdm" + shape = (number_of_controls,) + define_dJdm_member(accumvar) + + # Tell loopy about + globalarg(accumvar, shape=shape) + assignee = prim.Subscript(prim.Variable(accumvar), accumulation_index) + + # We need to accumulate + expr = prim.Sum((assignee, expr)) + + from dune.perftool.generation import instruction + quad_inames = visitor.interface.quadrature_inames() + instruction(assignee=assignee, + expression=expr, + forced_iname_deps=frozenset(quad_inames), + forced_iname_deps_is_final=True, + ) + + +def list_accumulation_infos(expr, visitor): + return ["control", ] + + +class ControlInterface(PDELabInterface): + """Interface for generating the control localoperator + + In this case we will not accumulate in the residual vector but use + a class member representing dJdm instead. + + """ + def __init__(self, accumulation_index, number_of_controls): + """Create ControlInterface + + Arguments: + ---------- + accumulation_index: In which component of the dJdm should be accumulated. + number_of_controls: Number of components of dJdm. Needed for creating the member variable. + """ + self.accumulation_index = accumulation_index + self.number_of_controls = number_of_controls + + def list_accumulation_infos(self, expr, visitor): + return list_accumulation_infos(expr, visitor) + + def generate_accumulation_instruction(self, expr, visitor): + return generate_accumulation_instruction(expr, + visitor, + self.accumulation_index, + self.number_of_controls) + + +def get_visitor(measure, subdomain_id, accumulation_index, number_of_controls): + interface = ControlInterface(accumulation_index, number_of_controls) + from dune.perftool.ufl.visitor import UFL2LoopyVisitor + return UFL2LoopyVisitor(interface, measure, subdomain_id) + + +def visit_integral(integral, accumulation_index, number_of_controls): + integrand = integral.integrand() + measure = integral.integral_type() + subdomain_id = integral.subdomain_id() + + # The visitor needs to know about the current index and the number + # of controls in order to generate the accumulation instruction + visitor = get_visitor(measure, subdomain_id, accumulation_index, number_of_controls) + + # Start the visiting process! + visitor.accumulate(integrand) + + +def generate_kernel(forms): + # Similar to the standard residual generation, except: + # - Have multiple forms + # - Pass index and number of forms along + logger = logging.getLogger(__name__) + + # Visit all integrals once to collect information (dry-run)! + logger.debug('generate_kernel: visit_integrals (dry run)') + with global_context(dry_run=True): + for i, form in enumerate(forms): + for integral in form: + visit_integral(integral, i, len(forms)) + + # Now perform some checks on what should be done + from dune.perftool.sumfact.vectorization import decide_vectorization_strategy + logger.debug('generate_kernel: decide_vectorization_strategy') + decide_vectorization_strategy() + + # Delete the cache contents and do the real thing! + logger.debug('generate_kernel: visit_integrals (no dry run)') + from dune.perftool.generation import delete_cache_items + delete_cache_items("kernel_default") + for i, form in enumerate(forms): + for integral in form: + visit_integral(integral, i, len(forms)) + + from dune.perftool.pdelab.signatures import kernel_name, assembly_routine_signature + name = kernel_name() + signature = assembly_routine_signature() + knl = extract_kernel_from_cache("kernel_default", name, signature) + delete_cache_items("kernel_default") + + # Reset the quadrature degree + from dune.perftool.sumfact.tabulation import set_quadrature_points + set_quadrature_points(None) + + # Clean the cache from any data collected after the dry run + delete_cache_items("dryrundata") + + return knl + + +# @backend(interface="generate_kernels_per_integral") +def control_generate_kernels_per_integral(forms): + """For the control problem forms will have one form for every + measure. Every form will only contain integrals of one type. + + """ + yield generate_kernel(forms) diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py index 848d9d9ad275b4ff910da9e9f6514fe883f9b436..30449edea105a0a962c5883559e61ced1fc4cdb0 100644 --- a/python/dune/perftool/pdelab/argument.py +++ b/python/dune/perftool/pdelab/argument.py @@ -5,7 +5,6 @@ Namely: * accumulation object (r, jac...) """ -from dune.perftool.options import get_option from dune.perftool.generation import (domain, function_mangler, iname, @@ -178,14 +177,14 @@ def name_accumulation_variable(restrictions=None): if measure == "cell": restrictions = (Restriction.NONE,) else: - restrictions = (Restriction.NEGATIVE,) + restrictions = (Restriction.POSITIVE,) return name_residual(*restrictions) if ft == 'jacobian': if restrictions is None: if measure == "cell": restrictions = (Restriction.NONE, Restriction.NONE) else: - restrictions = (Restriction.NEGATIVE, Restriction.NEGATIVE) + restrictions = (Restriction.POSITIVE, Restriction.POSITIVE) return name_jacobian(*restrictions) assert False diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index 7effdb5835daa3c333bfbd134da45366310c9a07..55c9a7ef7676c7647d485486d73440aed9d22aa2 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -6,10 +6,11 @@ from dune.perftool.generation import (backend, include_file, instruction, kernel_cached, + preamble, temporary_variable, ) from dune.perftool.options import (option_switch, - get_option + get_form_option, ) from dune.perftool.pdelab.spaces import (lfs_iname, lfs_inames, @@ -23,6 +24,7 @@ from dune.perftool.pdelab.geometry import (component_iname, world_dimension, name_jacobian_inverse_transposed, to_cell_coordinates, + name_cell, ) from dune.perftool.pdelab.localoperator import (lop_template_ansatz_gfs, lop_template_test_gfs, @@ -84,7 +86,7 @@ def declare_cache_temporary(element, restriction, which): t_cache = type_localbasis_cache(element) lfs = name_leaf_lfs(element, restriction) - def decl(name, shape, shape_impl): + def decl(name, kernel, decl_info): return "typename {}::{}ReturnType {};".format(t_cache, which, name, @@ -171,7 +173,7 @@ def evaluate_coefficient(visitor, element, name, container, restriction, index): lfs = name_lfs(element, restriction, index) basis = visitor.interface.pymbolic_basis(sub_element, restriction, 0, context='trial') basisindex = get_pymbolic_indices(basis)[:-1] - if get_option("blockstructured"): + if get_form_option("blockstructured"): from dune.perftool.blockstructured.argument import pymbolic_coefficient coeff = pymbolic_coefficient(container, lfs, sub_element, basisindex) else: @@ -207,7 +209,7 @@ def evaluate_coefficient_gradient(visitor, element, name, container, restriction from dune.perftool.tools import maybe_wrap_subscript basis = maybe_wrap_subscript(basis, Variable(dimindex)) - if get_option("blockstructured"): + if get_form_option("blockstructured"): from dune.perftool.blockstructured.argument import pymbolic_coefficient coeff = pymbolic_coefficient(container, lfs, sub_element, basisindex) else: @@ -221,5 +223,5 @@ def evaluate_coefficient_gradient(visitor, element, name, container, restriction instruction(expression=Reduction("sum", basisindex, reduction_expr, allow_simultaneous=True), assignee=assignee, forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset({dimindex})), - forced_iname_deps_is_final=True + forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pdelab/driver/__init__.py b/python/dune/perftool/pdelab/driver/__init__.py index e052fa954f61a3d0890ba373a3527cc770ff3fac..3877165c574ac8ef7b6d7197b56f97d071f38f42 100644 --- a/python/dune/perftool/pdelab/driver/__init__.py +++ b/python/dune/perftool/pdelab/driver/__init__.py @@ -12,90 +12,60 @@ NB: Previously this __init__.py was a module driver.py. As it was growing, """ from dune.perftool.error import PerftoolCodegenError from dune.perftool.generation import (generator_factory, + get_global_context_value, global_context, include_file, cached, pre_include, preamble, ) -from dune.perftool.options import get_option +from dune.perftool.options import (get_form_option, + get_option, + ) -# Have a global variable with the entire form data. This allows functions that depend -# deterministically on the entire data set to directly access it instead of passing it -# through the entire generator chain. -_driver_data = {} +# +# The following functions are not doing anything useful, but providing easy access +# to quantities that are needed throughout the process of generating the driver! +# -# Have a function access this global data structure -def set_driver_data(formdatas, data): - assert (len(formdatas) <= 2) - if len(formdatas) == 1: - _driver_data['form'] = formdatas[0].preprocessed_form - _driver_data['formdata'] = formdatas[0] - else: - mass_index = mass_form_index(formdatas, data) - if mass_index is None: - raise NotImplementedError("Form for mass matrix needs to have name 'mass' in ufl file.") - _driver_data['mass_form'] = formdatas[mass_index].preprocessed_form - _driver_data['mass_formdata'] = formdatas[mass_index] - _driver_data['form'] = formdatas[1 - mass_index].preprocessed_form - _driver_data['formdata'] = formdatas[1 - mass_index] +def get_form_ident(): + idents = [i.strip() for i in get_option("operators").split(",")] + if len(idents) == 2: + idents.remove("mass") + assert(len(idents) == 1) + return idents[0] + - _driver_data['data'] = data +def get_form(): + data = get_global_context_value("data") + return data.object_by_name[get_form_option("form", get_form_ident())] def get_dimension(): - return _driver_data['form'].ufl_cell().geometric_dimension() + return get_form().ufl_cell().geometric_dimension() def get_cell(): - return _driver_data['form'].ufl_cell().cellname() + return get_form().ufl_cell().cellname() def get_test_element(): - return _driver_data['form'].arguments()[0].ufl_element() + return get_form().arguments()[0].ufl_element() def get_trial_element(): - return _driver_data['form'].coefficients()[0].ufl_element() - - -def get_formdata(): - return _driver_data['formdata'] - - -def get_mass_formdata(): - return _driver_data["mass_formdata"] + return get_form().coefficients()[0].ufl_element() def is_stationary(): - return 'mass_form' not in _driver_data - - -def form_name_suffix(name, formdata): - from dune.perftool.pdelab.localoperator import name_form - data = _driver_data['data'] - form_name = name_form(formdata, data) - return name + '_' + form_name - - -def get_object(name): - return _driver_data['data'].object_by_name.get(name, None) - - -def mass_form_index(formdatas, data): - for index, formdata in enumerate(formdatas): - try: - if data.object_names[id(formdata.original_form)] == 'mass': - return index - except KeyError: - continue + return "mass" not in [i.strip() for i in get_option("operators").split(",")] def is_linear(form=None): '''Test if form is linear in trial function''' if form is None: - form = get_formdata().original_form + form = get_form() from ufl import derivative from ufl.algorithms import expand_derivatives jacform = expand_derivatives(derivative(form, form.coefficients()[0])) @@ -192,8 +162,11 @@ def unroll_list_tensors(data): yield e -def preprocess_leaf_data(element, data): - data = get_object(data) +def preprocess_leaf_data(element, data, applyZeroDefault=True): + data = get_global_context_value("data").object_by_name.get(data, None) + if data is None and not applyZeroDefault: + return None + from ufl import MixedElement if isinstance(element, MixedElement): # data is None -> use 0 default @@ -222,7 +195,7 @@ def name_inifile(): return "argv[1]" -@preamble +@preamble(section="init") def parse_initree(varname): include_file("dune/common/parametertree.hh", filetag="driver") include_file("dune/common/parametertreeparser.hh", filetag="driver") @@ -236,7 +209,7 @@ def name_initree(): return "initree" -@preamble +@preamble(section="init") def define_mpihelper(name): include_file("dune/common/parallel/mpihelper.hh", filetag="driver") return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name) @@ -248,23 +221,40 @@ def name_mpihelper(): return name -def generate_driver(formdatas, data): - # The driver module uses a global dictionary for storing necessary data - set_driver_data(formdatas, data) +@preamble(section="grid") +def check_parallel_execution(): + from dune.perftool.pdelab.driver.gridfunctionspace import name_leafview + gv = name_leafview() + return ["if ({}.comm().size()==1){{".format(gv), + ' std::cout << "This program should be run in parallel!" << std::endl;', + " return 1;", + "}"] + + +def generate_driver(): + # Guarantee that config.h is the very first include in the generated file + include_file("config.h", filetag="driver") + + # Make sure that the MPI helper is instantiated + name_mpihelper() + + # Add check to c++ file if this program should only be used in parallel mode + if get_option("parallel"): + check_parallel_execution() # Entrypoint for driver generation - if get_option("opcounter") or get_option("time_opcounter"): - if get_option("time_opcounter"): + if get_option("opcounter") or get_option("performance_measuring"): + if get_option("performance_measuring"): assert(not get_option("opcounter")) - assert(any(_driver_data['form'].ufl_cell().cellname() in x for x in - ["vertex", "interval", "quadrilateral", "hexahedron"])) - # In case of operator conunting we only assemble the matrix and evaluate the residual + assert(isQuadrilateral(get_cell())) + # In case of operator counting we only assemble the matrix and evaluate the residual # assemble_matrix_timer() from dune.perftool.pdelab.driver.timings import apply_jacobian_timer, evaluate_residual_timer from dune.perftool.loopy.target import type_floatingpoint pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") evaluate_residual_timer() - apply_jacobian_timer() + if get_form_option("matrix_free"): + apply_jacobian_timer() elif is_stationary(): from dune.perftool.pdelab.driver.solve import dune_solve vec = dune_solve() @@ -288,11 +278,33 @@ def generate_driver(formdatas, data): return_statement() from dune.perftool.generation import retrieve_cache_items - from cgen import FunctionDeclaration, FunctionBody, Block, Value - driver_signature = FunctionDeclaration(Value('bool', 'driver'), [Value('int', 'argc'), Value('char**', 'argv')]) - contents = [i for i in retrieve_cache_items("preamble", make_generable=True)] + from cgen import FunctionDeclaration, FunctionBody, Block, Value, LineComment, Line + driver_signature = FunctionDeclaration(Value('int', 'main'), [Value('int', 'argc'), Value('char**', 'argv')]) + + contents = [] + + def add_section(tag, comment): + tagcontents = [i for i in retrieve_cache_items("preamble and {}".format(tag), make_generable=True)] + if tagcontents: + contents.append(LineComment(comment)) + contents.append(Line("\n")) + contents.extend(tagcontents) + contents.append(Line("\n")) + + add_section("init", "Initialize basic stuff...") + add_section("grid", "Setup grid (view)...") + add_section("fem", "Set up finite element maps...") + add_section("gfs", "Set up grid function spaces...") + add_section("constraints", "Set up constraints container...") + add_section("gridoperator", "Set up grid grid operators...") + add_section("vector", "Set up solution vectors...") + add_section("timings", "Maybe take performance measurements...") + add_section("solver", "Set up (non)linear solvers...") + add_section("vtk", "Do visualization...") + add_section("instat", "Set up instationary stuff...") + add_section("printing", "Maybe print residuals and matrices to stdout...") + add_section("error", "Maybe calculate errors for test results...") - from cgen import Line if get_option("instrumentation_level") >= 1: from dune.perftool.generation import post_include post_include("HP_DECLARE_TIMER(driver);\n", filetag="driver") @@ -301,12 +313,27 @@ def generate_driver(formdatas, data): contents.insert(len(contents) - 1, Line(text="DUMP_TIMER({}, driver, {}, true);\n".format(get_option("instrumentation_level"), timestream))) contents.insert(0, Line(text="\n")) driver_body = Block(contents) + + # Wrap a try/catch block around the driver body + from dune.perftool.cgen import CatchBlock, TryCatchBlock, Value, Block, Line + catch_blocks = [CatchBlock(Value("Dune::Exception&", "e"), + Block([Line("std::cerr << \"Dune reported error: \" << e << std::endl;\n"), + Line("return 1;\n"), + ]) + ), + CatchBlock(Value("std::exception&", "e"), + Block([Line("std::cerr << \"Unknown exception thrown!\" << std::endl;\n"), + Line("return 1;\n"), + ]) + ) + ] + driver_body = Block([TryCatchBlock(driver_body, catch_blocks)]) driver = FunctionBody(driver_signature, driver_body) filename = get_option("driver_file") from dune.perftool.file import generate_file - generate_file(filename, "driver", [driver]) + generate_file(filename, "driver", [driver], headerguard=False) # Reset the caching data structure from dune.perftool.generation import delete_cache_items diff --git a/python/dune/perftool/pdelab/driver/constraints.py b/python/dune/perftool/pdelab/driver/constraints.py index 9158fc6be52d5c95c976eb08041fc3ad5b4a5da6..e3a96df485dc382761587e0950764372102869a6 100644 --- a/python/dune/perftool/pdelab/driver/constraints.py +++ b/python/dune/perftool/pdelab/driver/constraints.py @@ -3,7 +3,6 @@ from dune.perftool.generation import (global_context, preamble, ) from dune.perftool.pdelab.driver import (FEM_name_mangling, - get_formdata, get_trial_element, ) from dune.perftool.pdelab.driver.gridfunctionspace import (name_gfs, @@ -24,14 +23,26 @@ def name_assembled_constraints(): return name -@preamble +def has_dirichlet_constraints(is_dirichlet): + if isinstance(is_dirichlet, (list, tuple)): + return any(bool(d) for d in is_dirichlet) + else: + return bool(is_dirichlet) + + +@preamble(section="constraints") def assemble_constraints(name): element = get_trial_element() gfs = name_trial_gfs() is_dirichlet = preprocess_leaf_data(element, "is_dirichlet") - bctype_function = name_bctype_function(element, is_dirichlet) - return "Dune::PDELab::constraints({}, {}, {});".format(bctype_function, - gfs, + if has_dirichlet_constraints(is_dirichlet): + bctype_function = name_bctype_function(element, is_dirichlet) + return "Dune::PDELab::constraints({}, {}, {});".format(bctype_function, + gfs, + name, + ) + else: + return "Dune::PDELab::constraints({}, {});".format(gfs, name, ) @@ -59,7 +70,7 @@ def name_bctype_function(element, is_dirichlet): return name -@preamble +@preamble(section="constraints") def define_bctype_function(element, is_dirichlet, name): gv = name_leafview() bctype_lambda = name_bctype_lambda(name, is_dirichlet) @@ -70,13 +81,13 @@ def define_bctype_function(element, is_dirichlet, name): ) -@preamble +@preamble(section="constraints") def define_power_bctype_function(element, name, subgfs): include_file('dune/pdelab/constraints/common/constraintsparameters.hh', filetag='driver') return "Dune::PDELab::PowerConstraintsParameters<decltype({}), {}> {}({});".format(subgfs, element.num_sub_elements(), name, subgfs) -@preamble +@preamble(section="constraints") def define_composite_bctype_function(element, is_dirichlet, name, subgfs): include_file('dune/pdelab/constraints/common/constraintsparameters.hh', filetag='driver') return "Dune::PDELab::CompositeConstraintsParameters<{}> {}({});".format(', '.join('decltype({})'.format(c) for c in subgfs), @@ -91,7 +102,7 @@ def name_bctype_lambda(name, func): return name -@preamble +@preamble(section="constraints") def define_intersection_lambda(name, func): from ufl.classes import Expr if func is None: @@ -105,7 +116,7 @@ def define_intersection_lambda(name, func): raise ValueError("Expression not understood") -@preamble +@preamble(section="constraints") def typedef_constraintscontainer(name): gfs = type_trial_gfs() r = type_range() @@ -118,7 +129,7 @@ def type_constraintscontainer(): return name -@preamble +@preamble(section="constraints") def define_constraintscontainer(name): cctype = type_constraintscontainer() return ["{} {};".format(cctype, name), "{}.clear();".format(name)] diff --git a/python/dune/perftool/pdelab/driver/error.py b/python/dune/perftool/pdelab/driver/error.py index 6e7fb9128e5437650baf140191311ee4a02eb5f7..a4a7e2d414c7ffffce709c97d80c1b7a329dbef3 100644 --- a/python/dune/perftool/pdelab/driver/error.py +++ b/python/dune/perftool/pdelab/driver/error.py @@ -5,11 +5,12 @@ from dune.perftool.generation import (cached, preamble, ) from dune.perftool.options import get_option -from dune.perftool.pdelab.driver import (get_formdata, +from dune.perftool.pdelab.driver import (get_form_ident, get_trial_element, preprocess_leaf_data, ) -from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs, +from dune.perftool.pdelab.driver.gridfunctionspace import (name_leafview, + name_trial_gfs, name_trial_subgfs, type_range, ) @@ -23,7 +24,7 @@ from dune.perftool.pdelab.driver.solve import (define_vector, from ufl import MixedElement, TensorElement, VectorElement -@preamble +@preamble(section="error") def define_test_fail_variable(name): return 'bool {}(false);'.format(name) @@ -48,7 +49,7 @@ def type_discrete_grid_function(gfs): return "{}_DGF".format(gfs.upper()) -@preamble +@preamble(section="error") def define_discrete_grid_function(gfs, vector_name, dgf_name): dgf_type = type_discrete_grid_function(gfs) return ["using {} = Dune::PDELab::DiscreteGridFunction<decltype({}),decltype({})>;".format(dgf_type, gfs, vector_name), @@ -61,10 +62,10 @@ def name_discrete_grid_function(gfs, vector_name): return dgf_name -@preamble +@preamble(section="error") def typedef_difference_squared_adapter(name, treepath): sol = name_exact_solution_gridfunction(treepath) - vector = name_vector(get_formdata()) + vector = name_vector(get_form_ident()) gfs = name_trial_subgfs(treepath) dgf = name_discrete_grid_function(gfs, vector) @@ -77,11 +78,11 @@ def type_difference_squared_adapter(treepath): return name -@preamble +@preamble(section="error") def define_difference_squared_adapter(name, treepath): t = type_difference_squared_adapter(treepath) sol = name_exact_solution_gridfunction(treepath) - vector = name_vector(get_formdata()) + vector = name_vector(get_form_ident()) gfs = name_trial_subgfs(treepath) dgf = name_discrete_grid_function(gfs, vector) @@ -94,7 +95,7 @@ def name_difference_squared_adapter(treepath): return name -@preamble +@preamble(section="error") def _accumulate_L2_squared(treepath): dsa = name_difference_squared_adapter(treepath) accum_error = name_accumulated_L2_error() @@ -104,14 +105,21 @@ def _accumulate_L2_squared(treepath): strtp = ", ".join(str(t) for t in treepath) + gv = name_leafview() + sum_error_over_ranks = "" + if get_option("parallel"): + sum_error_over_ranks = " err = {}.comm().sum(err);".format(gv) return ["{", " // L2 error squared of difference between numerical", " // solution and the interpolation of exact solution", " // for treepath ({})".format(strtp), " typename decltype({})::Traits::RangeType err(0.0);".format(dsa), " Dune::PDELab::integrateGridFunction({}, err, 10);".format(dsa), + sum_error_over_ranks, " {} += err;".format(accum_error), - " std::cout << \"L2 Error for treepath {}: \" << err << std::endl;".format(strtp), + " if ({}.comm().rank() == 0){{".format(gv), + " std::cout << \"L2 Error for treepath {}: \" << err << std::endl;".format(strtp), + " }" "}", ] @@ -139,13 +147,18 @@ def treepath_to_index(element, treepath, offset=0): def accumulate_L2_squared(): element = get_trial_element() if isinstance(element, MixedElement): - for i in range(element.value_size()): - _accumulate_L2_squared(get_treepath(element, i)) + tree_pathes = (True,) * element.value_size() + if get_option("l2error_tree_path") is not None: + tree_pathes = list(map(int, get_option("l2error_tree_path").split(','))) + assert len(tree_pathes) == element.value_size() + for i, path in enumerate(tree_pathes): + if path: + _accumulate_L2_squared(get_treepath(element, i)) else: _accumulate_L2_squared(()) -@preamble +@preamble(section="error") def define_accumulated_L2_error(name): t = type_range() return "{} {}(0.0);".format(t, name) @@ -157,20 +170,23 @@ def name_accumulated_L2_error(): return name -@preamble +@preamble(section="error") def compare_L2_squared(): accumulate_L2_squared() + gv = name_leafview() accum_error = name_accumulated_L2_error() fail = name_test_fail_variable() return ["using std::abs;", "using std::isnan;", - "std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error), + "if ({}.comm().rank() == 0){{".format(gv), + " std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error), + "}", "if (isnan({0}) or abs({0})>{1})".format(accum_error, get_option("compare_l2errorsquared")), " {} = true;".format(fail)] -@preamble +@preamble(section="error") def return_statement(): from dune.perftool.pdelab.driver.error import name_test_fail_variable fail = name_test_fail_variable() diff --git a/python/dune/perftool/pdelab/driver/gridfunctionspace.py b/python/dune/perftool/pdelab/driver/gridfunctionspace.py index c1b439eae390add27ad96337f9d8851671811b41..a956855ad0ee827d8f64e6b731071db82a3c851b 100644 --- a/python/dune/perftool/pdelab/driver/gridfunctionspace.py +++ b/python/dune/perftool/pdelab/driver/gridfunctionspace.py @@ -1,7 +1,10 @@ from dune.perftool.generation import (include_file, preamble, ) -from dune.perftool.options import get_option, set_option +from dune.perftool.options import (get_form_option, + get_option, + set_option, + ) from dune.perftool.pdelab.driver import (FEM_name_mangling, get_cell, get_dimension, @@ -21,7 +24,7 @@ from dune.perftool.loopy.target import type_floatingpoint from ufl import FiniteElement, MixedElement, TensorElement, VectorElement, TensorProductElement -@preamble +@preamble(section="grid") def typedef_domainfield(name): gridt = type_grid() return "using {} = {}::ctype;".format(name, gridt) @@ -32,7 +35,7 @@ def type_domainfield(): return "DF" -@preamble +@preamble(section="init") def typedef_range(name): return "using {} = {};".format(name, type_floatingpoint()) @@ -43,16 +46,12 @@ def type_range(): return name -@preamble +@preamble(section="grid") def typedef_grid(name): dim = get_dimension() if isQuadrilateral(get_trial_element().cell()): - # For Yasp Grids the jacobi of the transformation is diagonal and constant on each cell - set_option('diagonal_transformation_matrix', True) - set_option('constant_transformation_matrix', True) - range_type = type_range() - if get_option("grid_offset"): + if get_option("yaspgrid_offset"): gridt = "Dune::YaspGrid<{0}, Dune::EquidistantOffsetCoordinates<{1}, {0}>>".format(dim, range_type) else: gridt = "Dune::YaspGrid<{0}, Dune::EquidistantCoordinates<{1}, {0}>>".format(dim, range_type) @@ -74,7 +73,7 @@ def type_grid(): return name -@preamble +@preamble(section="grid") def define_grid(name): include_file("dune/testtools/gridconstruction.hh", filetag="driver") ini = name_initree() @@ -89,7 +88,7 @@ def name_grid(): return name -@preamble +@preamble(section="grid") def typedef_leafview(name): grid = type_grid() return "using {} = {}::LeafGridView;".format(name, grid) @@ -101,7 +100,7 @@ def type_leafview(): return name -@preamble +@preamble(section="grid") def define_leafview(name): _type = type_leafview() grid = name_grid() @@ -114,16 +113,16 @@ def name_leafview(): return name -@preamble +@preamble(section="fem") def typedef_fem(element, name): gv = type_leafview() df = type_domainfield() r = type_range() dim = get_dimension() - if get_option("blockstructured"): + if get_form_option("blockstructured"): include_file("dune/perftool/blockstructured/blockstructuredqkfem.hh", filetag="driver") - degree = element.degree() * get_option("number_of_blocks") + degree = element.degree() * get_form_option("number_of_blocks") return "using {} = Dune::PDELab::BlockstructuredQkLocalFiniteElementMap<{}, {}, {}, {}>;" \ .format(name, gv, df, r, degree) @@ -177,7 +176,7 @@ def type_fem(element): return name -@preamble +@preamble(section="fem") def define_fem(element, name): femtype = type_fem(element) from dune.perftool.pdelab.driver import isDG @@ -234,6 +233,8 @@ def name_gfs(element, is_dirichlet, treepath=(), root=True): subgfs.append(name_gfs(subel, is_dirichlet[k:k + subel.value_size()], treepath=treepath + (i,), root=False)) k = k + subel.value_size() name = "_".join(subgfs) + if len(subgfs) == 1: + name = "{}_dummy".format(name) name = "{}_{}".format(name, "_".join(str(t) for t in treepath)) define_composite_gfs(element, is_dirichlet, name, tuple(subgfs), root) return name @@ -272,6 +273,8 @@ def type_gfs(element, is_dirichlet, root=True): subgfs.append(type_gfs(subel, is_dirichlet[k:k + subel.value_size()], root=False)) k = k + subel.value_size() name = "_".join(subgfs) + if len(subgfs) == 1: + name = "{}_dummy".format(name) typedef_composite_gfs(element, name, tuple(subgfs), root) return name else: @@ -283,7 +286,7 @@ def type_gfs(element, is_dirichlet, root=True): return name -@preamble +@preamble(section="gfs") def define_gfs(element, is_dirichlet, name, root): gfstype = type_gfs(element, is_dirichlet, root=root) gv = name_leafview() @@ -292,7 +295,7 @@ def define_gfs(element, is_dirichlet, name, root): "{}.name(\"{}\");".format(name, name)] -@preamble +@preamble(section="gfs") def define_power_gfs(element, is_dirichlet, name, subgfs, root): gfstype = type_gfs(element, is_dirichlet, root=root) names = ["using namespace Dune::Indices;"] @@ -300,14 +303,14 @@ def define_power_gfs(element, is_dirichlet, name, subgfs, root): return ["{} {}({});".format(gfstype, name, subgfs)] + names -@preamble +@preamble(section="gfs") def define_composite_gfs(element, is_dirichlet, name, subgfs, root): gfstype = type_gfs(element, is_dirichlet, root=root) return ["{} {}({});".format(gfstype, name, ", ".join(subgfs)), "{}.update();".format(name)] -@preamble +@preamble(section="gfs") def typedef_gfs(element, is_dirichlet, name, root): vb = type_vectorbackend(element, root) gv = type_leafview() @@ -316,7 +319,7 @@ def typedef_gfs(element, is_dirichlet, name, root): return "using {} = Dune::PDELab::GridFunctionSpace<{}, {}, {}, {}>;".format(name, gv, fem, cass, vb) -@preamble +@preamble(section="gfs") def typedef_power_gfs(element, is_dirichlet, name, subgfs, root): include_file("dune/pdelab/gridfunctionspace/powergridfunctionspace.hh", filetag="driver") vb = type_vectorbackend(element, root) @@ -325,7 +328,7 @@ def typedef_power_gfs(element, is_dirichlet, name, subgfs, root): return "using {} = Dune::PDELab::PowerGridFunctionSpace<{}, {}, {}, {}>;".format(name, subgfs, element.num_sub_elements(), vb, ot) -@preamble +@preamble(section="gfs") def typedef_composite_gfs(element, name, subgfs, root): vb = type_vectorbackend(element, root) ot = type_orderingtag(isinstance(element, FiniteElement)) @@ -333,10 +336,10 @@ def typedef_composite_gfs(element, name, subgfs, root): return "using {} = Dune::PDELab::CompositeGridFunctionSpace<{}, {}, {}>;".format(name, vb, ot, args) -@preamble +@preamble(section="gfs") def typedef_vectorbackend(name, element, root): include_file("dune/pdelab/backend/istl.hh", filetag="driver") - if get_option("fastdg") and root: + if get_form_option("fastdg") and root: blocking = "Dune::PDELab::ISTL::Blocking::fixed" if isinstance(element, MixedElement): blocksize = "" @@ -357,33 +360,52 @@ def type_vectorbackend(element, root): def type_orderingtag(leaf): - if leaf or not get_option("fastdg"): + if leaf or not get_form_option("fastdg"): return "Dune::PDELab::LexicographicOrderingTag" else: return "Dune::PDELab::EntityBlockedOrderingTag" -@preamble +@preamble(section="gfs") +def typedef_overlapping_dirichlet_constraintsassembler(name): + include_file("dune/pdelab/constraints/conforming.hh", filetag="driver") + return "using {} = Dune::PDELab::ConformingDirichletConstraints;".format(name) + + +@preamble(section="gfs") +def typedef_p0parallel_constraintsassembler(name): + include_file("dune/pdelab/constraints/p0.hh", filetag="driver") + return "using {} = Dune::PDELab::P0ParallelConstraints;".format(name) + + +@preamble(section="gfs") def typedef_dirichlet_constraintsassembler(name): include_file("dune/pdelab/constraints/conforming.hh", filetag="driver") return "using {} = Dune::PDELab::ConformingDirichletConstraints;".format(name) -@preamble +@preamble(section="gfs") def typedef_no_constraintsassembler(name): return "using {} = Dune::PDELab::NoConstraints;".format(name) def type_constraintsassembler(is_dirichlet): assert isinstance(is_dirichlet, bool) - if is_dirichlet: + overlapping = get_option("overlapping") + if is_dirichlet and not overlapping: name = "DirichletConstraintsAssember" typedef_dirichlet_constraintsassembler(name) - return name + elif is_dirichlet and overlapping: + name = "OverlappingConformingDirichletConstraints" + typedef_overlapping_dirichlet_constraintsassembler(name) + elif not is_dirichlet and overlapping: + name = "P0ParallelConstraints" + typedef_p0parallel_constraintsassembler(name) else: + assert not is_dirichlet and not overlapping name = "NoConstraintsAssembler" typedef_no_constraintsassembler(name) - return name + return name def name_trial_subgfs(treepath): @@ -400,7 +422,7 @@ def name_subgfs(treepath): return name -@preamble +@preamble(section="vtk") def define_subgfs(name, treepath): t = type_subgfs(treepath) gfs = name_trial_gfs() diff --git a/python/dune/perftool/pdelab/driver/gridoperator.py b/python/dune/perftool/pdelab/driver/gridoperator.py index 4727f4f281ed6e8c6205cf5613fc778b07c476ac..8b5c8c0223b61f3d8a5464170cb5c2e496151e15 100644 --- a/python/dune/perftool/pdelab/driver/gridoperator.py +++ b/python/dune/perftool/pdelab/driver/gridoperator.py @@ -2,8 +2,7 @@ from dune.perftool.generation import (get_global_context_value, include_file, preamble, ) -from dune.perftool.pdelab.driver import (form_name_suffix, - get_cell, +from dune.perftool.pdelab.driver import (get_cell, get_dimension, get_test_element, get_trial_element, @@ -22,21 +21,20 @@ from dune.perftool.pdelab.driver.gridfunctionspace import (name_test_gfs, type_trial_gfs, ) from dune.perftool.pdelab.localoperator import localoperator_basename -from dune.perftool.pdelab.parameter import parameterclass_basename -from dune.perftool.options import get_option +from dune.perftool.options import get_form_option -@preamble -def typedef_gridoperator(name, formdata): +@preamble(section="gridoperator") +def typedef_gridoperator(name, form_ident): ugfs = type_trial_gfs() vgfs = type_test_gfs() - lop = type_localoperator(formdata) + lop = type_localoperator(form_ident) cc = type_constraintscontainer() mb = type_matrixbackend() df = type_domainfield() r = type_range() - if get_option("fastdg"): - if not get_option("sumfact"): + if get_form_option("fastdg"): + if not get_form_option("sumfact"): raise PerftoolCodegenError("FastDGGridOperator is only implemented for sumfactorization.") include_file("dune/pdelab/gridoperator/fastdg.hh", filetag="driver") return "using {} = Dune::PDELab::FastDGGridOperator<{}, {}, {}, {}, {}, {}, {}, {}, {}>;".format(name, ugfs, vgfs, lop, mb, df, r, r, cc, cc) @@ -45,68 +43,66 @@ def typedef_gridoperator(name, formdata): return "using {} = Dune::PDELab::GridOperator<{}, {}, {}, {}, {}, {}, {}, {}, {}>;".format(name, ugfs, vgfs, lop, mb, df, r, r, cc, cc) -def type_gridoperator(formdata): - name = form_name_suffix("GO", formdata).upper() - typedef_gridoperator(name, formdata) +def type_gridoperator(form_ident): + name = "GO_{}".format(form_ident) + typedef_gridoperator(name, form_ident) return name -@preamble -def define_gridoperator(name, formdata): - gotype = type_gridoperator(formdata) +@preamble(section="gridoperator") +def define_gridoperator(name, form_ident): + gotype = type_gridoperator(form_ident) ugfs = name_trial_gfs() vgfs = name_test_gfs() if ugfs != vgfs: raise NotImplementedError("Non-Galerkin methods currently not supported!") cc = name_assembled_constraints() - lop = name_localoperator(formdata) + lop = name_localoperator(form_ident) mb = name_matrixbackend() return ["{} {}({}, {}, {}, {}, {}, {});".format(gotype, name, ugfs, cc, vgfs, cc, lop, mb), "std::cout << \"gfs with \" << {}.size() << \" dofs generated \"<< std::endl;".format(ugfs), "std::cout << \"cc with \" << {}.size() << \" dofs generated \"<< std::endl;".format(cc)] -def name_gridoperator(formdata): - name = form_name_suffix("go", formdata) - define_gridoperator(name, formdata) +def name_gridoperator(form_ident): + name = "go_{}".format(form_ident) + define_gridoperator(name, form_ident) return name -@preamble -def typedef_localoperator(name, formdata): +@preamble(section="gridoperator") +def typedef_localoperator(name, form_ident): ugfs = type_trial_gfs() vgfs = type_test_gfs() - data = get_global_context_value("data") - filename = get_option("operator_file") + filename = get_form_option("filename", form_ident) include_file(filename, filetag="driver") - lopname = localoperator_basename(formdata, data) + lopname = localoperator_basename(form_ident) range_type = type_range() return "using {} = {}<{}, {}, {}>;".format(name, lopname, ugfs, vgfs, range_type) -def type_localoperator(formdata): - name = form_name_suffix("LOP", formdata).upper() - typedef_localoperator(name, formdata) +def type_localoperator(form_ident): + name = "LOP_{}".format(form_ident.upper()) + typedef_localoperator(name, form_ident) return name -@preamble -def define_localoperator(name, formdata): +@preamble(section="gridoperator") +def define_localoperator(name, form_ident): trial_gfs = name_trial_gfs() test_gfs = name_test_gfs() - loptype = type_localoperator(formdata) + loptype = type_localoperator(form_ident) ini = name_initree() - params = name_parameters(formdata) - return "{} {}({}, {}, {}, {});".format(loptype, name, trial_gfs, test_gfs, ini, params) + return "{} {}({}, {}, {});".format(loptype, name, trial_gfs, test_gfs, ini) -def name_localoperator(formdata): - name = form_name_suffix("lop", formdata) - define_localoperator(name, formdata) +def name_localoperator(form_ident): + name = "lop_{}".format(form_ident) + define_localoperator(name, form_ident) return name -@preamble +@preamble(section="gridoperator") def define_dofestimate(name): # Provide a worstcase estimate for the number of entries per row based # on the given gridfunction space and cell geometry @@ -133,7 +129,7 @@ def name_dofestimate(): return name -@preamble +@preamble(section="gridoperator") def typedef_matrixbackend(name): include_file("dune/pdelab/backend/istl.hh", filetag="driver") return "using {} = Dune::PDELab::ISTL::BCRSMatrixBackend<>;".format(name) @@ -145,7 +141,7 @@ def type_matrixbackend(): return name -@preamble +@preamble(section="gridoperator") def define_matrixbackend(name): mbtype = type_matrixbackend() dof = name_dofestimate() @@ -156,21 +152,3 @@ def name_matrixbackend(): name = "mb" define_matrixbackend(name) return name - - -def type_parameters(formdata): - data = get_global_context_value("data") - name = parameterclass_basename(formdata, data) - return name - - -@preamble -def define_parameters(name, formdata): - partype = type_parameters(formdata) - return "{} {};".format(partype, name) - - -def name_parameters(formdata): - name = form_name_suffix("params", formdata) - define_parameters(name, formdata) - return name diff --git a/python/dune/perftool/pdelab/driver/instationary.py b/python/dune/perftool/pdelab/driver/instationary.py index fe2cbc5a5a819c7e274759ab9e91cc61d1b14e34..c37792896a23837131ca71f83dcc9f7e497476a3 100644 --- a/python/dune/perftool/pdelab/driver/instationary.py +++ b/python/dune/perftool/pdelab/driver/instationary.py @@ -1,8 +1,7 @@ from dune.perftool.generation import (include_file, preamble, ) -from dune.perftool.pdelab.driver import (get_formdata, - get_mass_formdata, +from dune.perftool.pdelab.driver import (get_form_ident, get_trial_element, is_linear, name_initree, @@ -12,9 +11,11 @@ from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs, type_range, ) from dune.perftool.pdelab.driver.gridoperator import (name_gridoperator, - name_parameters, - type_gridoperator,) -from dune.perftool.pdelab.driver.constraints import (name_bctype_function, + type_gridoperator, + name_localoperator, + ) +from dune.perftool.pdelab.driver.constraints import (has_dirichlet_constraints, + name_bctype_function, name_constraintscontainer, ) from dune.perftool.pdelab.driver.interpolate import (interpolate_dirichlet_data, @@ -31,13 +32,15 @@ from dune.perftool.pdelab.driver.solve import (print_matrix, ) from dune.perftool.pdelab.driver.vtk import (name_vtk_sequence_writer, visualize_initial_condition, - ) -from dune.perftool.options import get_option + name_predicate) +from dune.perftool.options import (get_form_option, + get_option, + ) def solve_instationary(): # Create time loop - if get_option('matrix_free'): + if get_form_option('matrix_free'): raise NotImplementedError("Instationary matrix free not implemented!") else: time_loop() @@ -46,20 +49,27 @@ def solve_instationary(): print_matrix() -@preamble +@preamble(section="instat") def time_loop(): ini = name_initree() - formdata = get_formdata() - params = name_parameters(formdata) + lop = name_localoperator(get_form_ident()) time = name_time() element = get_trial_element() - is_dirichlet = preprocess_leaf_data(element, "is_dirichlet") - bctype = name_bctype_function(element, is_dirichlet) - gfs = name_trial_gfs() - cc = name_constraintscontainer() - vector_type = type_vector(formdata) - vector = name_vector(formdata) + vector_type = type_vector(get_form_ident()) + vector = name_vector(get_form_ident()) interpolate_dirichlet_data(vector) + gfs = name_trial_gfs() + + is_dirichlet = preprocess_leaf_data(element, "is_dirichlet") + assemble_new_constraints = "" + if has_dirichlet_constraints(is_dirichlet): + bctype = name_bctype_function(element, is_dirichlet) + cc = name_constraintscontainer() + assemble_new_constraints = (" // Assemble constraints for new time step\n" + " {}.setTime({}+dt);\n" + " Dune::PDELab::constraints({}, {}, {});\n" + "\n".format(lop, time, bctype, gfs, cc) + ) # Choose between explicit and implicit time stepping explicit = get_option('explicit_time_stepping') @@ -67,25 +77,27 @@ def time_loop(): osm = name_explicitonestepmethod() apply_call = "{}.apply(time, dt, {}, {}new);".format(osm, vector, vector) else: - dirichlet = preprocess_leaf_data(element, "dirichlet_expression") - boundary = name_boundary_function(element, dirichlet) osm = name_onestepmethod() + if has_dirichlet_constraints(is_dirichlet): + dirichlet = preprocess_leaf_data(element, "interpolate_expression") + boundary = name_boundary_function(element, dirichlet) apply_call = "{}.apply(time, dt, {}, {}, {}new);".format(osm, vector, boundary, vector) + else: + apply_call = "{}.apply(time, dt, {}, {}new);".format(osm, vector, vector) # Setup visualization visualize_initial_condition() vtk_sequence_writer = name_vtk_sequence_writer() + predicate = name_predicate() + return ["", "double T = {}.get<double>(\"instat.T\", 1.0);".format(ini), "double dt = {}.get<double>(\"instat.dt\", 0.1);".format(ini), "int step_number(0);" "int output_every_nth = {}.get<int>(\"instat.output_every_nth\", 1);".format(ini), "while (time<T-1e-8){", - " // Assemble constraints for new time step", - " {}.setTime({}+dt);".format(params, time), - " Dune::PDELab::constraints({}, {}, {});".format(bctype, gfs, cc), - "", + "{}".format(assemble_new_constraints), " // Do time step", " {} {}new({});".format(vector_type, vector, vector), " {}".format(apply_call), @@ -97,13 +109,16 @@ def time_loop(): " step_number += 1;", " if (step_number%output_every_nth == 0){", " // Output to VTK File", + " {}.vtkWriter()->clear();".format(vtk_sequence_writer), + " Dune::PDELab::addSolutionToVTKWriter(vtkSequenceWriter, {}, {},".format(gfs, vector), + " Dune::PDELab::vtk::defaultNameScheme(), {});".format(predicate), " {}.write({}, Dune::VTK::appendedraw);".format(vtk_sequence_writer, time), " }", "}", ""] -@preamble +@preamble(section="init") def define_time(name): return "double {} = 0.0;".format(name) @@ -113,7 +128,7 @@ def name_time(): return "time" -@preamble +@preamble(section="instat") def typedef_timesteppingmethod(name): r_type = type_range() explicit = get_option('explicit_time_stepping') @@ -128,7 +143,7 @@ def type_timesteppingmethod(): return "TSM" -@preamble +@preamble(section="instat") def define_timesteppingmethod(name): tsm_type = type_timesteppingmethod() explicit = get_option('explicit_time_stepping') @@ -144,11 +159,11 @@ def name_timesteppingmethod(): return "tsm" -@preamble +@preamble(section="gridoperator") def typedef_instationarygridoperator(name): include_file("dune/pdelab/gridoperator/onestep.hh", filetag="driver") - go_type = type_gridoperator(get_formdata()) - mass_go_type = type_gridoperator(get_mass_formdata()) + go_type = type_gridoperator(get_form_ident()) + mass_go_type = type_gridoperator("mass") explicit = get_option('explicit_time_stepping') if explicit: return "using {} = Dune::PDELab::OneStepGridOperator<{},{},false>;".format(name, go_type, mass_go_type) @@ -161,11 +176,11 @@ def type_instationarygridoperator(): return "IGO" -@preamble +@preamble(section="gridoperator") def define_instationarygridoperator(name): igo_type = type_instationarygridoperator() - go = name_gridoperator(get_formdata()) - mass_go = name_gridoperator(get_mass_formdata()) + go = name_gridoperator(get_form_ident()) + mass_go = name_gridoperator("mass") return "{} {}({}, {});".format(igo_type, name, go, mass_go) @@ -174,12 +189,12 @@ def name_instationarygridoperator(): return "igo" -@preamble +@preamble(section="instat") def typedef_onestepmethod(name): r_type = type_range() igo_type = type_instationarygridoperator() snp_type = type_stationarynonlinearproblemssolver(igo_type) - vector_type = type_vector(get_formdata()) + vector_type = type_vector(get_form_ident()) return "using {} = Dune::PDELab::OneStepMethod<{}, {}, {}, {}, {}>;".format(name, r_type, igo_type, snp_type, vector_type, vector_type) @@ -188,7 +203,7 @@ def type_onestepmethod(): return "OSM" -@preamble +@preamble(section="instat") def define_onestepmethod(name): ilptype = type_onestepmethod() tsm = name_timesteppingmethod() @@ -203,12 +218,12 @@ def name_onestepmethod(): return "osm" -@preamble +@preamble(section="instat") def typedef_explicitonestepmethod(name): r_type = type_range() igo_type = type_instationarygridoperator() ls_type = type_linearsolver() - vector_type = type_vector(get_formdata()) + vector_type = type_vector(get_form_ident()) return "using {} = Dune::PDELab::ExplicitOneStepMethod<{}, {}, {}, {}>;".format(name, r_type, igo_type, ls_type, vector_type) @@ -217,7 +232,7 @@ def type_explicitonestepmethod(): return "EOSM" -@preamble +@preamble(section="instat") def define_explicitonestepmethod(name): eosm_type = type_explicitonestepmethod() tsm = name_timesteppingmethod() diff --git a/python/dune/perftool/pdelab/driver/interpolate.py b/python/dune/perftool/pdelab/driver/interpolate.py index ebbdbfd3d044d276a8692660252d2a7768a9389a..75846a1f57bead0d7ca87c0014a3e8715c2836d6 100644 --- a/python/dune/perftool/pdelab/driver/interpolate.py +++ b/python/dune/perftool/pdelab/driver/interpolate.py @@ -5,7 +5,7 @@ from dune.perftool.generation import (cached, preamble, ) from dune.perftool.pdelab.driver import (FEM_name_mangling, - get_formdata, + get_form_ident, get_trial_element, is_stationary, preprocess_leaf_data, @@ -13,29 +13,19 @@ from dune.perftool.pdelab.driver import (FEM_name_mangling, from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs, name_leafview, ) -from dune.perftool.pdelab.driver.gridoperator import (name_parameters,) - from ufl import FiniteElement, MixedElement, TensorElement, VectorElement, TensorProductElement -def _do_interpolate(dirichlet): - if isinstance(dirichlet, (list, tuple)): - return any(bool(d) for d in dirichlet) - else: - return bool(dirichlet) - - def interpolate_dirichlet_data(name): element = get_trial_element() - is_dirichlet = preprocess_leaf_data(element, "is_dirichlet") - if _do_interpolate(is_dirichlet) or not is_stationary(): + func = preprocess_leaf_data(element, "interpolate_expression", applyZeroDefault=False) + if func is not None: + bf = name_boundary_function(element, func) gfs = name_trial_gfs() - dirichlet = preprocess_leaf_data(element, "dirichlet_expression") - bf = name_boundary_function(element, dirichlet) interpolate_vector(bf, gfs, name) -@preamble +@preamble(section="vector") def interpolate_vector(func, gfs, name): return "Dune::PDELab::interpolate({}, {}, {});".format(func, gfs, @@ -52,6 +42,8 @@ def name_boundary_function(element, func): childs.append(name_boundary_function(subel, func[k:k + subel.value_size()])) k = k + subel.value_size() name = "_".join(childs) + if len(childs) == 1: + name = "{}_dummy".format(name) define_compositegfs_parameterfunction(name, tuple(childs)) return name else: @@ -61,14 +53,14 @@ def name_boundary_function(element, func): return name -@preamble +@preamble(section="vector") def define_compositegfs_parameterfunction(name, children): return "Dune::PDELab::CompositeGridFunction<{}> {}({});".format(', '.join('decltype({})'.format(c) for c in children), name, ', '.join(children)) -@preamble +@preamble(section="vector") def define_boundary_function(name, dirichlet): gv = name_leafview() lambdaname = name_boundary_lambda(dirichlet) @@ -79,11 +71,13 @@ def define_boundary_function(name, dirichlet): lambdaname, ) else: - params = name_parameters(get_formdata()) + from dune.perftool.pdelab.driver.gridoperator import name_localoperator + lop = name_localoperator(get_form_ident()) return "auto {} = Dune::PDELab::makeInstationaryGridFunctionFromCallable({}, {}, {});".format(name, gv, lambdaname, - params) + lop, + ) @cached @@ -93,7 +87,7 @@ def name_boundary_lambda(boundary): return name -@preamble +@preamble(section="vector") def define_boundary_lambda(name, boundary): if boundary is None: boundary = 0.0 diff --git a/python/dune/perftool/pdelab/driver/solve.py b/python/dune/perftool/pdelab/driver/solve.py index e877ece672cf79f7db0ed373e96257d9fbfb98a7..0648df399d809fd84d30624711dfc2fb02bbc631 100644 --- a/python/dune/perftool/pdelab/driver/solve.py +++ b/python/dune/perftool/pdelab/driver/solve.py @@ -1,32 +1,38 @@ from dune.perftool.generation import (include_file, preamble, ) -from dune.perftool.options import get_option -from dune.perftool.pdelab.driver import (form_name_suffix, - get_formdata, +from dune.perftool.options import (get_form_option, + get_option, + ) +from dune.perftool.pdelab.driver import (get_form_ident, is_linear, name_initree, ) -from dune.perftool.pdelab.driver.gridfunctionspace import name_trial_gfs +from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs, + type_domainfield, + type_trial_gfs, + ) +from dune.perftool.pdelab.driver.constraints import (type_constraintscontainer, + name_assembled_constraints, + ) from dune.perftool.pdelab.driver.gridoperator import (name_gridoperator, type_gridoperator, ) from dune.perftool.pdelab.driver.interpolate import interpolate_dirichlet_data -@preamble +@preamble(section="solver") def dune_solve(): + form_ident = get_form_ident() # Test if form is linear in ansatzfunction linear = is_linear() # Test wether we want to do matrix free operator evaluation - matrix_free = get_option('matrix_free') - + matrix_free = get_form_option('matrix_free') # Get right solve command if linear and matrix_free: - formdata = get_formdata() - go = name_gridoperator(formdata) - x = name_vector(formdata) + go = name_gridoperator(form_ident) + x = name_vector(form_ident) include_file("dune/perftool/matrixfree.hh", filetag="driver") solve = "solveMatrixFree({},{});".format(go, x) elif linear and not matrix_free: @@ -34,14 +40,13 @@ def dune_solve(): solve = "{}.apply();".format(slp) elif not linear and matrix_free: # TODO copy of linear case and obviously broken, used to generate something ;) - formdata = get_formdata() - go = name_gridoperator(formdata) - x = name_vector(formdata) + go = name_gridoperator(form_ident) + x = name_vector(form_ident) include_file("dune/perftool/matrixfree.hh", filetag="driver") solve = "solveNonlinearMatrixFree({},{});".format(go, x) elif not linear and not matrix_free: - go_type = type_gridoperator(get_formdata()) - go = name_gridoperator(get_formdata()) + go_type = type_gridoperator(form_ident) + go = name_gridoperator(form_ident) snp = name_stationarynonlinearproblemsolver(go_type, go) solve = "{}.apply();".format(snp) @@ -49,62 +54,62 @@ def dune_solve(): print_matrix() if get_option('instrumentation_level') >= 2: - from dune.perftool.pdelab.driver.timings import setup_timer, name_timing_stream + from dune.perftool.pdelab.driver.timings import setup_timer, name_timing_stream, name_timing_identifier + timestream = name_timing_stream() setup_timer() from dune.perftool.generation import post_include post_include("HP_DECLARE_TIMER(solve);", filetag="driver") - # Print times after solving - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - print_times = [] - for formdata in formdatas: - from dune.perftool.pdelab.driver.gridoperator import name_localoperator - lop_name = name_localoperator(formdata) - timestream = name_timing_stream() - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - solve = ["HP_TIMER_START(solve);", "{}".format(solve), "HP_TIMER_STOP(solve);", "DUMP_TIMER({}, solve, {}, true);".format(get_option("instrumentation_level"), timestream), ] + if get_option('instrumentation_level') >= 3: - solve.extend(print_times) + from dune.perftool.pdelab.driver.gridoperator import name_localoperator + lop_name = name_localoperator(form_ident) + solve.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) return solve -def name_vector(formdata): - name = form_name_suffix("x", formdata) - define_vector(name, formdata) +def name_vector(form_ident): + name = "x_{}".format(form_ident) + define_vector(name, form_ident) interpolate_dirichlet_data(name) return name -@preamble -def typedef_vector(name, formdata): - go_type = type_gridoperator(formdata) - return "using {} = {}::Traits::Domain;".format(name, go_type) +@preamble(section="vector") +def typedef_vector(name, form_ident): + gfs = type_trial_gfs() + df = type_domainfield() + return "using {} = Dune::PDELab::Backend::Vector<{},{}>;".format(name, gfs, df) -def type_vector(formdata): - name = form_name_suffix("V", formdata).upper() - typedef_vector(name, formdata) +def type_vector(form_ident): + name = "V_{}".format(form_ident.upper()) + typedef_vector(name, form_ident) return name -@preamble -def define_vector(name, formdata): - vtype = type_vector(formdata) +@preamble(section="vector") +def define_vector(name, form_ident): + vtype = type_vector(form_ident) gfs = name_trial_gfs() return ["{} {}({});".format(vtype, name, gfs), "{} = 0.0;".format(name)] -@preamble +@preamble(section="solver") def typedef_linearsolver(name): include_file("dune/pdelab/backend/istl.hh", filetag="driver") - return "using {} = Dune::PDELab::ISTLBackend_SEQ_SuperLU;".format(name) + if get_option('overlapping'): + gfs = type_trial_gfs() + cc = type_constraintscontainer() + return "using {} = Dune::PDELab::ISTLBackend_OVLP_BCGS_ILU0<{},{}>;".format(name, gfs, cc) + else: + return "using {} = Dune::PDELab::ISTLBackend_SEQ_SuperLU;".format(name) def type_linearsolver(): @@ -113,10 +118,15 @@ def type_linearsolver(): return name -@preamble +@preamble(section="solver") def define_linearsolver(name): lstype = type_linearsolver() - return "{} {}(false);".format(lstype, name) + if get_option('overlapping'): + gfs = name_trial_gfs() + cc = name_assembled_constraints() + return "{} {}({}, {});".format(lstype, name, gfs, cc) + else: + return "{} {}(false);".format(lstype, name) def name_linearsolver(): @@ -125,7 +135,7 @@ def name_linearsolver(): return name -@preamble +@preamble(section="solver") def define_reduction(name): ini = name_initree() return "double {} = {}.get<double>(\"reduction\", 1e-12);".format(name, ini) @@ -137,12 +147,12 @@ def name_reduction(): return name -@preamble +@preamble(section="solver") def typedef_stationarylinearproblemsolver(name): include_file("dune/pdelab/stationary/linearproblem.hh", filetag="driver") - gotype = type_gridoperator(get_formdata()) + gotype = type_gridoperator(get_form_ident()) lstype = type_linearsolver() - xtype = type_vector(get_formdata()) + xtype = type_vector(get_form_ident()) return "using {} = Dune::PDELab::StationaryLinearProblemSolver<{}, {}, {}>;".format(name, gotype, lstype, xtype) @@ -151,13 +161,12 @@ def type_stationarylinearproblemsolver(): return "SLP" -@preamble +@preamble(section="solver") def define_stationarylinearproblemsolver(name): slptype = type_stationarylinearproblemsolver() - formdata = get_formdata() - go = name_gridoperator(formdata) + go = name_gridoperator(get_form_ident()) ls = name_linearsolver() - x = name_vector(formdata) + x = name_vector(get_form_ident()) red = name_reduction() return "{} {}({}, {}, {}, {});".format(slptype, name, go, ls, x, red) @@ -167,11 +176,11 @@ def name_stationarylinearproblemsolver(): return "slp" -@preamble +@preamble(section="solver") def typedef_stationarynonlinearproblemsolver(name, go_type): include_file("dune/pdelab/newton/newton.hh", filetag="driver") ls_type = type_linearsolver() - x_type = type_vector(get_formdata()) + x_type = type_vector(get_form_ident()) return "using {} = Dune::PDELab::Newton<{}, {}, {}>;".format(name, go_type, ls_type, x_type) @@ -181,10 +190,10 @@ def type_stationarynonlinearproblemssolver(go_type): return name -@preamble +@preamble(section="solver") def define_stationarynonlinearproblemsolver(name, go_type, go): snptype = type_stationarynonlinearproblemssolver(go_type) - x = name_vector(get_formdata()) + x = name_vector(get_form_ident()) ls = name_linearsolver() return "{} {}({}, {}, {});".format(snptype, name, go, x, ls) @@ -195,13 +204,12 @@ def name_stationarynonlinearproblemsolver(go_type, go): return name -@preamble +@preamble(section="printing") def print_residual(): ini = name_initree() - formdata = get_formdata() - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + t_v = type_vector(get_form_ident()) include_file("random", system=True, filetag="driver") return ["if ({}.get<bool>(\"printresidual\", false)) {{".format(ini), @@ -219,14 +227,13 @@ def print_residual(): "}"] -@preamble +@preamble(section="printing") def print_matrix(): - formdata = get_formdata() ini = name_initree() - t_go = type_gridoperator(formdata) - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) + t_go = type_gridoperator(get_form_ident()) + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + t_v = type_vector(get_form_ident()) return ["if ({}.get<bool>(\"printmatrix\", false)) {{".format(ini), " // Setup random input", diff --git a/python/dune/perftool/pdelab/driver/timings.py b/python/dune/perftool/pdelab/driver/timings.py index d003c1c6226a243788fb0bf4e777bee7364538c2..84f0ce839d7684d165faa6ab07b57dfe340d6b13 100644 --- a/python/dune/perftool/pdelab/driver/timings.py +++ b/python/dune/perftool/pdelab/driver/timings.py @@ -1,13 +1,13 @@ """ Timing related generator functions """ -from dune.perftool.options import get_option, set_option +from dune.perftool.options import get_option from dune.perftool.generation import (cached, include_file, pre_include, post_include, preamble, ) -from dune.perftool.pdelab.driver import (get_formdata, +from dune.perftool.pdelab.driver import (get_form_ident, name_initree, name_mpihelper, ) @@ -24,7 +24,7 @@ from dune.perftool.pdelab.driver.solve import (name_vector, ) -@preamble +@preamble(section="timings") def define_timing_identifier(name): ini = name_initree() return "auto {} = {}.get<std::string>(\"identifier\", std::string(argv[0]));".format(name, ini) @@ -36,7 +36,7 @@ def name_timing_identifier(): return name -@preamble +@preamble(section="timings") def dump_dof_numbers(stream): ident = name_timing_identifier() level = get_option("instrumentation_level") @@ -51,7 +51,7 @@ def dump_dof_numbers(stream): ] -@preamble +@preamble(section="timings") def define_timing_stream(name): include_file('fstream', filetag='driver', system=True) include_file('sstream', filetag='driver', system=True) @@ -81,12 +81,11 @@ def setup_timer(): include_file("dune/perftool/common/timer.hh", filetag="driver") -@preamble +@preamble(section="timings") def evaluate_residual_timer(): - formdata = get_formdata() - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + t_v = type_vector(get_form_ident()) setup_timer() if get_option('instrumentation_level') >= 2: @@ -96,12 +95,9 @@ def evaluate_residual_timer(): timestream = name_timing_stream() print_times = [] - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + lop_name = name_localoperator(get_form_ident()) + if get_option('instrumentation_level') >= 3: + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: evaluation = ["HP_TIMER_START(residual_evaluation);", @@ -117,15 +113,11 @@ def evaluate_residual_timer(): return evaluation -@preamble +@preamble(section="timings") def apply_jacobian_timer(): - # Set the matrix_free option to True! - set_option("matrix_free", True) - - formdata = get_formdata() - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + t_v = type_vector(get_form_ident()) setup_timer() if get_option('instrumentation_level') >= 2: @@ -135,12 +127,9 @@ def apply_jacobian_timer(): timestream = name_timing_stream() print_times = [] - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + lop_name = name_localoperator(get_form_ident()) + if get_option('instrumentation_level') >= 3: + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: evaluation = ["HP_TIMER_START(apply_jacobian);", @@ -156,13 +145,12 @@ def apply_jacobian_timer(): return evaluation -@preamble +@preamble(section="timings") def assemble_matrix_timer(): - formdata = get_formdata() - t_go = type_gridoperator(formdata) - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) + t_go = type_gridoperator(get_form_ident()) + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + t_v = type_vector(get_form_ident()) setup_timer() if get_option('instrumentation_level') >= 2: @@ -172,12 +160,9 @@ def assemble_matrix_timer(): timestream = name_timing_stream() print_times = [] - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + lop_name = name_localoperator(get_form_ident()) + if get_option('instrumentation_level') >= 3: + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: assembly = ["HP_TIMER_START(matrix_assembly);", diff --git a/python/dune/perftool/pdelab/driver/visitor.py b/python/dune/perftool/pdelab/driver/visitor.py index 8ef37f0d8975fee436fc91c7092a62f5a27dc84f..6549c9a18b8780a983abcfd0cc0be5f07d2e1e3f 100644 --- a/python/dune/perftool/pdelab/driver/visitor.py +++ b/python/dune/perftool/pdelab/driver/visitor.py @@ -28,12 +28,21 @@ class DriverUFL2PymbolicVisitor(UFL2LoopyVisitor): driver_using_statement("std::min") return UFL2LoopyVisitor.min_value(self, o) + def coefficient(self, o): + if o.count() == 2: + from dune.perftool.pdelab.driver import get_form_ident + from dune.perftool.pdelab.driver.gridoperator import name_localoperator + lop = name_localoperator(get_form_ident()) + return prim.Call(prim.Variable("{}.getTime".format(lop)), ()) + else: + return UFL2LoopyVisitor.coefficient(self, o) + def ufl_to_code(expr, boundary=True): # So far, we only considered this code branch on boundaries! assert boundary - from dune.perftool.pdelab.driver import get_formdata - with global_context(integral_type="exterior_facet", formdata=get_formdata()): + from dune.perftool.pdelab.driver import get_form_ident + with global_context(integral_type="exterior_facet", form_identifier=get_form_ident()): visitor = DriverUFL2PymbolicVisitor() from pymbolic.mapper.c_code import CCodeMapper ccm = CCodeMapper() diff --git a/python/dune/perftool/pdelab/driver/vtk.py b/python/dune/perftool/pdelab/driver/vtk.py index 6b8b52adc22cf16ec493592dfc79fa5462be8f0b..3004c4892870d078e5611c5f1ac3eabd4da704ee 100644 --- a/python/dune/perftool/pdelab/driver/vtk.py +++ b/python/dune/perftool/pdelab/driver/vtk.py @@ -1,8 +1,8 @@ from dune.perftool.generation import (include_file, preamble, ) -from dune.perftool.options import get_option -from dune.perftool.pdelab.driver import (get_formdata, +from dune.perftool.options import get_form_option +from dune.perftool.pdelab.driver import (get_form_ident, get_trial_element, name_initree, preprocess_leaf_data, @@ -15,7 +15,7 @@ from dune.perftool.pdelab.driver.gridfunctionspace import (name_leafview, from dune.perftool.pdelab.driver.solve import name_vector -@preamble +@preamble(section="vtk") def define_vtkfile(name): ini = name_initree() include_file("string", filetag="driver") @@ -27,7 +27,7 @@ def name_vtkfile(): return "vtkfile" -@preamble +@preamble(section="vtk") def typedef_vtkwriter(name): include_file("dune/grid/io/file/vtk/subsamplingvtkwriter.hh", filetag="driver") gv = type_leafview() @@ -39,14 +39,14 @@ def type_vtkwriter(): return "VTKWriter" -@preamble +@preamble(section="vtk") def define_subsamplinglevel(name): ini = name_initree() degree = get_trial_element().degree() if isinstance(degree, tuple): degree = max(degree) - if get_option("blockstructured"): - degree *= get_option("number_of_blocks") + if get_form_option("blockstructured"): + degree *= get_form_option("number_of_blocks") return "Dune::RefinementIntervals {}({}.get<int>(\"vtk.subsamplinglevel\", {}));".format(name, ini, max(degree, 1)) @@ -55,7 +55,7 @@ def name_subsamplingintervals(): return "subint" -@preamble +@preamble(section="vtk") def define_vtkwriter(name): _type = type_vtkwriter() gv = name_leafview() @@ -68,14 +68,14 @@ def name_vtkwriter(): return "vtkwriter" -@preamble +@preamble(section="vtk") def vtkoutput(): include_file("dune/pdelab/gridfunctionspace/vtk.hh", filetag="driver") vtkwriter = name_vtkwriter() gfs = name_trial_gfs() vtkfile = name_vtkfile() predicate = name_predicate() - vec = name_vector(get_formdata()) + vec = name_vector(get_form_ident()) return ["Dune::PDELab::addSolutionToVTKWriter({}, {}, {}, Dune::PDELab::vtk::defaultNameScheme(), {});".format(vtkwriter, gfs, vec, predicate), "{}.write({}, Dune::VTK::ascii);".format(vtkwriter, vtkfile)] @@ -86,7 +86,7 @@ def type_predicate(): return "CuttingPredicate" -@preamble +@preamble(section="vtk") def define_predicate(name): t = type_predicate() return "{} {};".format(t, name) @@ -97,7 +97,7 @@ def name_predicate(): return "predicate" -@preamble +@preamble(section="vtk") def typedef_vtk_sequence_writer(name): include_file("dune/grid/io/file/vtk/vtksequencewriter.hh", filetag="driver") gv_type = type_leafview() @@ -109,7 +109,7 @@ def type_vtk_sequence_writer(): return "VTKSW" -@preamble +@preamble(section="vtk") def define_vtk_sequence_writer(name): vtksw_type = type_vtk_sequence_writer() vtkw_type = type_vtkwriter() @@ -123,13 +123,13 @@ def name_vtk_sequence_writer(): return "vtkSequenceWriter" -@preamble +@preamble(section="vtk") def visualize_initial_condition(): include_file("dune/pdelab/gridfunctionspace/vtk.hh", filetag="driver") vtkwriter = name_vtk_sequence_writer() element = get_trial_element() gfs = name_trial_gfs() - vector = name_vector(get_formdata()) + vector = name_vector(get_form_ident()) predicate = name_predicate() from dune.perftool.pdelab.driver.instationary import name_time time = name_time() diff --git a/python/dune/perftool/pdelab/function.py b/python/dune/perftool/pdelab/function.py new file mode 100644 index 0000000000000000000000000000000000000000..c1dadef18fdcb8a60a88d06aff5fac186a80432d --- /dev/null +++ b/python/dune/perftool/pdelab/function.py @@ -0,0 +1,52 @@ +from dune.perftool.generation import (get_backend, + instruction, + kernel_cached, + preamble, + temporary_variable, + ) +from dune.perftool.pdelab.geometry import (name_cell, + world_dimension, + ) +from dune.perftool.pdelab.localoperator import name_gridfunction_member + +import pymbolic.primitives as prim + + +@preamble +def bind_gridfunction_to_element(gf, restriction): + element = name_cell(restriction) + return "{}.bind({});".format(gf, element) + + +def declare_grid_function_range(gridfunction): + def _decl(name, kernel, decl_info): + return "typename decltype({})::Range {};".format(gridfunction, name) + + return _decl + + +@kernel_cached +def pymbolic_evaluate_gridfunction(name, coeff, restriction, grad): + diffOrder = 1 if grad else 0 + + gridfunction = name_gridfunction_member(coeff, restriction, diffOrder) + bind_gridfunction_to_element(gridfunction, restriction) + + temporary_variable(name, + shape=(1,) + (world_dimension(),) * diffOrder, + decl_method=declare_grid_function_range(gridfunction), + managed=False, + ) + + quadpos = get_backend(interface="qp_in_cell")(restriction) + instruction(code="{} = {}({});".format(name, gridfunction, quadpos), + assignees=frozenset({name}), + within_inames=frozenset(get_backend(interface="quad_inames")()), + within_inames_is_final=True, + ) + + +def pymbolic_gridfunction(coeff, restriction, grad): + name = "coeff{}{}".format(coeff.count(), "_grad" if grad else "") + pymbolic_evaluate_gridfunction(name, coeff, restriction, grad) + return prim.Subscript(prim.Variable(name), (0,)) diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py index 0bf2f9143134db9e7335943f750fccb8cf4e5819..d308917adbd3391d4b61832938b3a1c4f3355eb5 100644 --- a/python/dune/perftool/pdelab/geometry.py +++ b/python/dune/perftool/pdelab/geometry.py @@ -13,7 +13,7 @@ from dune.perftool.generation import (backend, temporary_variable, valuearg, ) -from dune.perftool.options import (get_option, +from dune.perftool.options import (get_form_option, option_switch, ) from dune.perftool.loopy.target import dtype_floatingpoint, type_floatingpoint @@ -60,8 +60,7 @@ def _component_iname(context, count): if context: context = '_' + context name = 'idim{}{}'.format(context, str(count)) - formdata = get_global_context_value('formdata') - dim = formdata.geometric_dimension + dim = world_dimension() domain(name, dim) return name @@ -113,7 +112,7 @@ def type_geometry_wrapper(): @preamble def define_restricted_cell(name, restriction): ig = name_intersection_geometry_wrapper() - which = "inside" if restriction == Restriction.NEGATIVE else "outside" + which = "inside" if restriction == Restriction.POSITIVE else "outside" return "const auto& {} = {}.{}();".format(name, ig, which, @@ -125,7 +124,7 @@ def name_cell(restriction): eg = name_element_geometry_wrapper() return "{}.entity()".format(eg) else: - which = "inside" if restriction == Restriction.NEGATIVE else "outside" + which = "inside" if restriction == Restriction.POSITIVE else "outside" name = "{}_cell".format(which) define_restricted_cell(name, restriction) return name @@ -187,7 +186,7 @@ def name_geometry(): @preamble def define_in_cell_geometry(restriction, name): ig = name_intersection_geometry_wrapper() - which = "In" if restriction == Restriction.NEGATIVE else "Out" + which = "In" if restriction == Restriction.POSITIVE else "Out" return "auto {} = {}.geometryIn{}side();".format(name, ig, which @@ -197,7 +196,7 @@ def define_in_cell_geometry(restriction, name): def name_in_cell_geometry(restriction): assert restriction is not Restriction.NONE - name = "geo_in_{}side".format("in" if restriction is Restriction.NEGATIVE else "out") + name = "geo_in_{}side".format("in" if restriction is Restriction.POSITIVE else "out") define_in_cell_geometry(restriction, name) return name @@ -217,7 +216,7 @@ def apply_in_cell_transformation(name, local, restriction): def pymbolic_in_cell_coordinates(local, restriction): basename = get_pymbolic_basename(local) - name = "{}_in_{}side".format(basename, "in" if restriction is Restriction.NEGATIVE else "out") + name = "{}_in_{}side".format(basename, "in" if restriction is Restriction.POSITIVE else "out") temporary_variable(name, shape=(world_dimension(),), shape_impl=("fv",)) apply_in_cell_transformation(name, local, restriction) return Variable(name) @@ -232,8 +231,11 @@ def to_cell_coordinates(local, restriction): def world_dimension(): - formdata = get_global_context_value('formdata') - return formdata.geometric_dimension + data = get_global_context_value("data") + form = data.object_by_name[get_form_option("form")] + from dune.perftool.ufl.preprocess import preprocess_form + form = preprocess_form(form).preprocessed_form + return form.ufl_cell().geometric_dimension() def intersection_dimension(): @@ -259,14 +261,14 @@ def evaluate_unit_outer_normal(name): @preamble -def declare_normal(name, shape, shape_impl): +def declare_normal(name, kernel, decl_info): ig = name_intersection_geometry_wrapper() return "auto {} = {}.centerUnitOuterNormal();".format(name, ig) def pymbolic_unit_outer_normal(): name = "outer_normal" - if not get_option("diagonal_transformation_matrix"): + if not get_form_option("diagonal_transformation_matrix"): temporary_variable(name, shape=(world_dimension(),), decl_method=declare_normal) evaluate_unit_outer_normal(name) else: @@ -291,19 +293,14 @@ def pymbolic_unit_inner_normal(): def type_jacobian_inverse_transposed(restriction): - if get_option('turn_off_diagonal_jacobian'): - dim = world_dimension() - ftype = type_floatingpoint() - return "typename Dune::FieldMatrix<{},{},{}>".format(ftype, dim, dim) - else: - geo = type_cell_geometry(restriction) - return "typename {}::JacobianInverseTransposed".format(geo) + geo = type_cell_geometry(restriction) + return "typename {}::JacobianInverseTransposed".format(geo) @kernel_cached def define_jacobian_inverse_transposed_temporary(restriction): @preamble - def _define_jacobian_inverse_transposed_temporary(name, shape, shape_impl): + def _define_jacobian_inverse_transposed_temporary(name, kernel, decl_info): t = type_jacobian_inverse_transposed(restriction) return "{} {};".format(t, name, @@ -475,7 +472,7 @@ def define_cell_volume(name, restriction): def pymbolic_cell_volume(restriction): - if get_option("constant_transformation_matrix"): + if get_form_option("constant_transformation_matrix"): return pymbolic_jacobian_determinant() else: name = restricted_name("volume", restriction) @@ -491,7 +488,7 @@ def define_facet_area(name): def pymbolic_facet_area(): - if get_option("constant_transformation_matrix"): + if get_form_option("constant_transformation_matrix"): return pymbolic_facet_jacobian_determinant() else: name = "area" diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 38a41bb06a70d0a08b47dc3144ffba412bec01d4..06c404ea861fcf5be8cb05c7b24bb0f6938b5fb7 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -3,7 +3,10 @@ from os.path import splitext import logging -from dune.perftool.options import (get_option, +import numpy as np + +from dune.perftool.options import (get_form_option, + get_option, option_switch) from dune.perftool.generation import (backend, base_class, @@ -13,6 +16,7 @@ from dune.perftool.generation import (backend, domain, dump_accumulate_timer, end_of_file, + function_mangler, generator_factory, get_backend, get_global_context_value, @@ -42,31 +46,6 @@ import loopy as lp import cgen -def name_form(formdata, data): - # Check wether the formdata has a name in UFL - try: - name = data.object_names[id(formdata.original_form)] - return name - except: - for index, form in enumerate(data.forms): - if formdata.preprocessed_form.equals(form): - name = str(index) - return name - # If the form has no name and can not be found in data.forms something went wrong - assert False - - -def name_localoperator_file(formdata, data): - from dune.perftool.options import get_option - if len(data.forms) == 1: - filename = get_option("operator_file") - else: - suffix = '_' + name_form(formdata, data) - basename, extension = splitext(get_option("operator_file")) - filename = basename + suffix + extension - return filename - - @template_parameter(classtag="operator") def lop_template_ansatz_gfs(): name = "GFSU" @@ -176,9 +155,45 @@ def name_initree_member(): @class_basename(classtag="operator") -def localoperator_basename(formdata, data): - form_name = name_form(formdata, data) - return "LocalOperator" + form_name.capitalize() +def localoperator_basename(form_ident): + return get_form_option("classname", form_ident) + + +def name_gridfunction_member(coeff, restriction, diffOrder=0): + # We reuse the grid function for volume integrals in skeleton integrals + if restriction == Restriction.POSITIVE: + restriction = Restriction.NONE + restr = "_n" if restriction == Restriction.NEGATIVE else "" + name = "local_gridfunction_coeff{}_diff{}{}".format(coeff.count(), diffOrder, restr) + define_gridfunction_member(name, coeff, restriction, diffOrder) + return name + + +def name_gridfunction_constructor_argument(coeff): + _type = type_gridfunction_template_parameter(coeff) + name = "gridfunction_coeff{}_".format(coeff.count()) + constructor_parameter("const {}&".format(_type), name, classtag="operator") + return name + + +@class_member(classtag="operator") +def define_gridfunction_member(name, coeff, restriction, diffOrder): + _type = type_gridfunction_template_parameter(coeff) + param = name_gridfunction_constructor_argument(coeff) + if diffOrder > 0: + other = name_gridfunction_member(coeff, restriction, diffOrder - 1) + init = "derivative({})".format(other) + initializer_list(name, [init], classtag="operator") + return "mutable decltype({}) {};".format(init, name) + else: + init = "localFunction({})".format(param) + initializer_list(name, [init], classtag="operator") + return "mutable typename {}::LocalFunction {};".format(_type, name) + + +@template_parameter(classtag="operator") +def type_gridfunction_template_parameter(coeff): + return "GRIDFUNCTION_COEFF{}".format(coeff.count()) def class_type_from_cache(classtag): @@ -244,7 +259,7 @@ def determine_accumulation_space(info, number): from loopy.types import NumpyType valuearg(lfs, dtype=NumpyType("str")) - if get_option("blockstructured"): + if get_form_option("blockstructured"): from dune.perftool.blockstructured.tools import micro_index_to_macro_index from dune.perftool.blockstructured.spaces import lfs_inames lfsi = micro_index_to_macro_index(subel, lfs_inames(subel, info.restriction, count=number)) @@ -276,7 +291,8 @@ def boundary_predicates(expr, measure, subdomain_id): # Get the original form and inspect the present measures from dune.perftool.generation import get_global_context_value - original_form = get_global_context_value("formdata").original_form + data = get_global_context_value("data") + original_form = data.object_by_name[get_form_option("form")] sd = original_form.subdomain_data() assert len(sd) == 1 @@ -295,16 +311,7 @@ def boundary_predicates(expr, measure, subdomain_id): visitor = get_visitor(measure, subdomain_id) cond = visitor(subdomain_data, do_predicates=True) else: - # Determine the name of the parameter function - cond = get_global_context_value("data").object_names[id(subdomain_data)] - - # Trigger the generation of code for this thing in the parameter class - from ufl.checks import is_cellwise_constant - cellwise_constant = is_cellwise_constant(expr) - from dune.perftool.pdelab.parameter import intersection_parameter_function - intersection_parameter_function(cond, subdomain_data, cellwise_constant, t='int32') - - cond = prim.Variable(cond) + raise NotImplementedError("Only UFL expressions allowed in subdomain_data right now.") predicates = predicates.union([prim.Comparison(cond, '==', subdomain_id)]) @@ -341,13 +348,12 @@ def _list_infos(expr, number, visitor): return element = ma[0].argexpr.ufl_element() - from dune.perftool.ufl.modified_terminals import Restriction if visitor.measure == "cell": restrictions = (Restriction.NONE,) elif visitor.measure == "exterior_facet": - restrictions = (Restriction.NEGATIVE,) + restrictions = (Restriction.POSITIVE,) elif visitor.measure == "interior_facet": - restrictions = (Restriction.NEGATIVE, Restriction.POSITIVE) + restrictions = (Restriction.POSITIVE, Restriction.NEGATIVE) for res in restrictions: for ei in range(element.value_size()): yield PDELabAccumulationInfo(element_index=ei, restriction=res) @@ -372,8 +378,7 @@ def get_accumulation_info(expr, visitor): restriction = visitor.restriction if visitor.measure == 'exterior_facet': - from dune.perftool.pdelab.restriction import Restriction - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE inames = visitor.interface.lfs_inames(leaf_element, restriction, @@ -425,10 +430,10 @@ def generate_accumulation_instruction(expr, visitor): def get_visitor(measure, subdomain_id): # Get a transformer instance for this kernel - if get_option('sumfact'): + if get_form_option('sumfact'): from dune.perftool.sumfact import SumFactInterface interface = SumFactInterface() - elif get_option('blockstructured'): + elif get_form_option('blockstructured'): from dune.perftool.blockstructured import BlockStructuredInterface interface = BlockStructuredInterface() else: @@ -469,7 +474,11 @@ def generate_kernel(integrals): delete_cache_items("kernel_default") for integral in integrals: visit_integral(integral) - knl = extract_kernel_from_cache("kernel_default") + + from dune.perftool.pdelab.signatures import kernel_name, assembly_routine_signature + name = kernel_name() + signature = assembly_routine_signature() + knl = extract_kernel_from_cache("kernel_default", name, signature) delete_cache_items("kernel_default") # Reset the quadrature degree @@ -487,7 +496,7 @@ def generate_kernels_per_integral(integrals): yield generate_kernel(integrals) -def extract_kernel_from_cache(tag, wrap_in_cgen=True): +def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timings=True): # Now extract regular loopy kernel components from dune.perftool.loopy.target import DuneTarget domains = [i for i in retrieve_cache_items("{} and domain".format(tag))] @@ -506,15 +515,9 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True): from loopy import Options opt = Options(ignore_boostable_into=True, check_dep_resolution=False, + enforce_variable_access_ordered="no_check", ) - # Find a name for the kernel - if wrap_in_cgen: - from dune.perftool.pdelab.signatures import kernel_name - name = kernel_name() - else: - name = "constructor_kernel" - # Create the kernel from loopy import make_kernel, preprocess_kernel kernel = make_kernel(domains, @@ -525,35 +528,21 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True): options=opt, silenced_warnings=silenced, name=name, + lang_version=(2017, 2, 1), ) from loopy import make_reduction_inames_unique kernel = make_reduction_inames_unique(kernel) - from dune.perftool.loopy.transformations.disjointgroups import make_groups_conflicting - kernel = make_groups_conflicting(kernel) - # Apply the transformations that were gathered during tree traversals for trafo in transformations: - kernel = trafo[0](kernel, *trafo[1]) - - # Precompute all the substrules - for sr in kernel.substitutions: - tmpname = "precompute_{}".format(sr) - kernel = lp.precompute(kernel, - sr, - temporary_name=tmpname, - ) - # Vectorization strategies are actually very likely to eliminate the - # precomputation temporary. To avoid the temporary elimination warning - # we need to explicitly disable it. - kernel = kernel.copy(silenced_warnings=kernel.silenced_warnings + ["temp_to_write({})".format(tmpname)]) + kernel = trafo[0](kernel, *trafo[1], **trafo[2]) from dune.perftool.loopy import heuristic_duplication kernel = heuristic_duplication(kernel) # Maybe apply vectorization strategies - if get_option("vectorization_quadloop"): - if get_option("sumfact"): + if get_form_option("vectorization_quadloop"): + if get_form_option("sumfact"): from dune.perftool.loopy.transformations.vectorize_quad import vectorize_quadrature_loop kernel = vectorize_quadrature_loop(kernel) else: @@ -578,11 +567,20 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True): from dune.perftool.loopy.transformations.matchfma import match_fused_multiply_add kernel = match_fused_multiply_add(kernel) + # Add instrumentation to the kernel + from dune.perftool.loopy.transformations.instrumentation import add_instrumentation + if add_timings and get_form_option("sumfact"): + from dune.perftool.pdelab.signatures import assembler_routine_name + kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4) + kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4) + kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4) + if wrap_in_cgen: # Wrap the kernel in something which can generate code - from dune.perftool.pdelab.signatures import assembly_routine_signature - signature = assembly_routine_signature() - kernel = LoopyKernelMethod(signature, kernel) + if signature is None: + from dune.perftool.pdelab.signatures import assembly_routine_signature + signature = assembly_routine_signature() + kernel = LoopyKernelMethod(signature, kernel, add_timings=add_timings) return kernel @@ -663,12 +661,16 @@ class LoopyKernelMethod(ClassMember): content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) content.append('}') - ClassMember.__init__(self, content) + ClassMember.__init__(self, content, name=kernel.name if kernel is not None else "") def cgen_class_from_cache(tag, members=[]): from dune.perftool.generation import retrieve_cache_items + # Sort the given member functions by their name to help debugging by fixing + # the order + members = sorted(members, key=lambda m: m.name) + # Generate the name by concatenating basename and template parameters basename, fullname = class_type_from_cache(tag) @@ -679,12 +681,15 @@ def cgen_class_from_cache(tag, members=[]): tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))] # Construct the constructor - constructor_knl = extract_kernel_from_cache(tag, wrap_in_cgen=False) + constructor_knl = extract_kernel_from_cache(tag, "constructor_kernel", None, wrap_in_cgen=False, add_timings=False) from dune.perftool.loopy.target import DuneTarget constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False)) signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params)) constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il) + from loopy import get_one_scheduled_kernel + constructor_knl = get_one_scheduled_kernel(constructor_knl) + # Take any temporary declarations from the kernel and make them class members target = DuneTarget() from loopy.codegen import CodeGenerationState @@ -705,16 +710,7 @@ def cgen_class_from_cache(tag, members=[]): return Class(basename, base_classes=base_classes, members=[constructor] + members + pm + decls, tparam_decls=tparams) -def generate_localoperator_kernels(formdata, data): - logger = logging.getLogger(__name__) - - # Extract the relevant attributes of the form data - form = formdata.preprocessed_form - - # Reset the generation cache - from dune.perftool.generation import delete_cache_items - delete_cache_items() - +def local_operator_default_settings(operator, form): # Manage includes and base classes that we always need include_file('dune/pdelab/gridfunctionspace/gridfunctionspace.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/idefault.hh', filetag="operatorfile") @@ -729,18 +725,26 @@ def generate_localoperator_kernels(formdata, data): # Trigger this one once early on to assure that template # parameters are set in the right order - localoperator_basename(formdata, data) + localoperator_basename(operator) lop_template_ansatz_gfs() lop_template_test_gfs() lop_template_range_field() - from dune.perftool.pdelab.parameter import parameterclass_basename - parameterclass_basename(formdata, data) - # Make sure there is always the same constructor arguments (even if parameter class is empty) - from dune.perftool.pdelab.localoperator import name_initree_member + # Make sure there is always the same constructor arguments, even if some of them are + # not strictly needed. Also ensure the order. name_initree_member() - from dune.perftool.pdelab.parameter import name_paramclass - name_paramclass() + + # Iterate over the needed grid functions in correct order + for c in sorted(filter(lambda c: c.count() > 2, form.coefficients()), key=lambda c: c.count()): + name_gridfunction_constructor_argument(c) + + # Set some options! + from dune.perftool.pdelab.driver import isQuadrilateral + if isQuadrilateral(form.arguments()[0].ufl_element().cell()): + from dune.perftool.options import set_form_option + # For Yasp Grids the jacobian of the transformation is diagonal and constant on each cell + set_form_option('diagonal_transformation_matrix', True) + set_form_option('constant_transformation_matrix', True) # Add right base classes for stationary/instationary operators base_class('Dune::PDELab::LocalOperatorDefaultFlags', classtag="operator") @@ -750,18 +754,18 @@ def generate_localoperator_kernels(formdata, data): base_class('Dune::PDELab::InstationaryLocalOperatorDefaultMethods<{}>' .format(rf), classtag="operator") - # Create set time method in parameter class - from dune.perftool.pdelab.parameter import define_set_time_method - define_set_time_method() - # Have a data structure collect the generated kernels - operator_kernels = {} +def generate_residual_kernels(form, original_form): + if not get_form_option("generate_residuals"): + return {} - logger.info("generate_localoperator_kernels: create residual methods") + logger = logging.getLogger(__name__) with global_context(form_type='residual'): + operator_kernels = {} + # Generate the necessary residual methods for measure in set(i.integral_type() for i in form.integrals()): - logger.info("generate_localoperator_kernels: measure {}".format(measure)) + logger.info("generate_residual_kernels: measure {}".format(measure)) with global_context(integral_type=measure): enum_pattern() pattern_baseclass() @@ -772,7 +776,7 @@ def generate_localoperator_kernels(formdata, data): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(form.integrals_by_type(measure))] # Maybe add numerical differentiation - if get_option("numerical_jacobian"): + if get_form_option("numerical_jacobian"): # Include headers for numerical methods include_file("dune/pdelab/localoperator/defaultimp.hh", filetag="operatorfile") @@ -791,9 +795,9 @@ def generate_localoperator_kernels(formdata, data): ) # In the case of matrix free operator evaluation we need jacobian apply methods - if get_option("matrix_free"): + if get_form_option("matrix_free"): from dune.perftool.pdelab.driver import is_linear - if is_linear(formdata.original_form): + if is_linear(original_form): # Numeical jacobian apply base class base_class("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), classtag="operator") @@ -812,21 +816,34 @@ def generate_localoperator_kernels(formdata, data): classtag="operator", ) - operator_kernels[(measure, 'residual')] = kernel + operator_kernels[(measure, 'residual')] = kernel + + return operator_kernels + + +def generate_jacobian_kernels(form, original_form): + logger = logging.getLogger(__name__) + + from ufl import derivative + jacform = derivative(original_form, original_form.coefficients()[0]) - # Generate the necessary jacobian methods - if not get_option("numerical_jacobian"): - logger.info("generate_localoperator_kernels: create jacobian methods") - from ufl import derivative - jacform = derivative(formdata.original_form, formdata.original_form.coefficients()[0]) + from dune.perftool.ufl.preprocess import preprocess_form + jacform = preprocess_form(jacform).preprocessed_form - from dune.perftool.ufl.preprocess import preprocess_form - jacform = preprocess_form(jacform).preprocessed_form + if get_form_option("block_preconditioner_diagonal"): + from dune.perftool.ufl.transformations.blockpreconditioner import diagonal_block_jacobian + jacform = diagonal_block_jacobian(jacform) + if get_form_option("block_preconditioner_offdiagonal"): + from dune.perftool.ufl.transformations.blockpreconditioner import offdiagonal_block_jacobian + jacform = offdiagonal_block_jacobian(jacform) - with global_context(form_type="jacobian"): + operator_kernels = {} + with global_context(form_type="jacobian"): + if get_form_option("generate_jacobians"): for measure in set(i.integral_type() for i in jacform.integrals()): - logger.info("generate_localoperator_kernels: measure {}".format(measure)) + logger.info("generate_jacobian_kernels: measure {}".format(measure)) with global_context(integral_type=measure): + from dune.perftool.pdelab.signatures import assembler_routine_name with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jacform.integrals_by_type(measure))] operator_kernels[(measure, 'jacobian')] = kernel @@ -841,43 +858,207 @@ def generate_localoperator_kernels(formdata, data): from dune.perftool.pdelab.signatures import assembly_routine_signature operator_kernels[(it, 'jacobian')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] - # Jacobian apply methods for matrix-free computations - if get_option("matrix_free"): - # The apply vector has reserved index 1 so we directly use Coefficient class from ufl - from ufl import Coefficient - apply_coefficient = Coefficient(form.coefficients()[0].ufl_element(), 1) - - # Create application of jacobian on vector - from ufl import action - jac_apply_form = action(jacform, apply_coefficient) - - # Create kernel for jacobian application - with global_context(form_type="jacobian_apply"): - for measure in set(i.integral_type() for i in jac_apply_form.integrals()): - with global_context(integral_type=measure): - with global_context(kernel=assembler_routine_name()): - kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jac_apply_form.integrals_by_type(measure))] - operator_kernels[(measure, 'jacobian_apply')] = kernel - - # Generate dummy functions for those kernels, that vanished in the differentiation process - # We *could* solve this problem by using lambda_* terms but we do not really want that, so - # we use empty jacobian assembly methods instead - alpha_measures = set(i.integral_type() for i in form.integrals()) - jacobian_apply_measures = set(i.integral_type() for i in jac_apply_form.integrals()) - for it in alpha_measures - jacobian_apply_measures: - with global_context(integral_type=it): - from dune.perftool.pdelab.signatures import assembly_routine_signature - operator_kernels[(it, 'jacobian_apply')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] + # Jacobian apply methods for matrix-free computations + if get_form_option("matrix_free"): + # The apply vector has reserved index 1 so we directly use Coefficient class from ufl + from ufl import Coefficient + apply_coefficient = Coefficient(form.coefficients()[0].ufl_element(), 1) + + # Create application of jacobian on vector + from ufl import action + jac_apply_form = action(jacform, apply_coefficient) + + # Create kernel for jacobian application + with global_context(form_type="jacobian_apply"): + for measure in set(i.integral_type() for i in jac_apply_form.integrals()): + with global_context(integral_type=measure): + from dune.perftool.pdelab.signatures import assembler_routine_name + with global_context(kernel=assembler_routine_name()): + kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jac_apply_form.integrals_by_type(measure))] + operator_kernels[(measure, 'jacobian_apply')] = kernel + + # Generate dummy functions for those kernels, that vanished in the differentiation process + # We *could* solve this problem by using lambda_* terms but we do not really want that, so + # we use empty jacobian assembly methods instead + alpha_measures = set(i.integral_type() for i in form.integrals()) + jacobian_apply_measures = set(i.integral_type() for i in jac_apply_form.integrals()) + for it in alpha_measures - jacobian_apply_measures: + with global_context(integral_type=it): + from dune.perftool.pdelab.signatures import assembly_routine_signature + operator_kernels[(it, 'jacobian_apply')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] + + return operator_kernels + + +def generate_control_kernels(forms): + # All forms will we written in the residual method and + # accumulation will be done in a class member instead of the + # residual. + logger = logging.getLogger(__name__) + with global_context(form_type='residual'): + operator_kernels = {} + + # Generate the necessary residual methods + for measure in set(i.integral_type() for form in forms for i in form.integrals()): + logger.info("generate_control_kernels: measure {}".format(measure)) + with global_context(integral_type=measure): + enum_pattern() + pattern_baseclass() + enum_alpha() + + from dune.perftool.pdelab.signatures import assembler_routine_name + with global_context(kernel=assembler_routine_name()): + # TODO: Sumfactorization not yet implemented + assert not get_form_option('sumfact') + + from dune.perftool.pdelab.adjoint import control_generate_kernels_per_integral + forms_measure = [form.integrals_by_type(measure) for form in forms] + kernel = [k for k in control_generate_kernels_per_integral(forms_measure)] + + operator_kernels[(measure, 'residual')] = kernel + + return operator_kernels + + +def generate_localoperator_kernels(operator): + logger = logging.getLogger(__name__) + + data = get_global_context_value("data") + original_form = data.object_by_name[get_form_option("form")] + from dune.perftool.ufl.preprocess import preprocess_form + + if get_form_option("adjoint"): + # Generate adjoint operator + # + # The jacobian of the adjoint form is just the jacobian of the + # original form with test and ansazt function swapped. A a + # linear form you have to subtract the derivative of the + # objective function w.r.t the ansatz function to get the + # final residual formulation of the adjoint. + # + # Might not be true in all cases but works for the simple ones. + assert get_form_option("objective_function") is not None + assert get_form_option("control") is False + + from ufl import derivative, adjoint, action, replace + from ufl.classes import Coefficient + + # Jacobian of the adjoint form + jacform = derivative(original_form, original_form.coefficients()[0]) + adjoint_jacform = adjoint(jacform) + + # Derivative of objective function w.r.t. state + objective = data.object_by_name[get_form_option("objective_function")] + objective_jacobian = derivative(objective, objective.coefficients()[0]) + + # Replace coefficient belonging to ansatz function with new coefficient + element = objective.coefficients()[0].ufl_element() + coeff = Coefficient(element, count=3) + objective_jacobian = replace(objective_jacobian, {objective.coefficients()[0]: coeff}) + if len(adjoint_jacform.coefficients()) > 0: + adjoint_jacform = replace(adjoint_jacform, {adjoint_jacform.coefficients()[0]: coeff}) + + # Residual of the adjoint form + adjoint_form = action(adjoint_jacform, original_form.coefficients()[0]) + adjoint_form = adjoint_form + objective_jacobian + + # Update form and original_form + original_form = adjoint_form + form = preprocess_form(adjoint_form).preprocessed_form + + elif get_form_option("control"): + # Generate control operator + # + # This is the normal form derived w.r.t. the control + # variable. We generate a form for every row of: + # + # \nabla \hat{J}(m) = (\nabla R(z,m))^T \lambda + \nabla_m J(z,m) + # + # These forms will not depend on the test function anymore and + # will need special treatment for the accumulation process. + from ufl import action, diff + from ufl.classes import Coefficient + + # Get control variables + assert get_form_option("control_variable") is not None + controls = [data.object_by_name[ctrl.strip()] for ctrl in get_form_option("control_variable").split(",")] + + # Transoform flat index to multiindex. Wrapper around numpy + # unravel since we need to transform numpy ints to native + # ints. + def _unravel(flat_index, shape): + multi_index = np.unravel_index(flat_index, shape) + multi_index = tuple(int(i) for i in multi_index) + return multi_index + + # Will be used to replace ansatz function with adjoint function + element = original_form.coefficients()[0].ufl_element() + coeff = Coefficient(element, count=3) + + # Store a form for every control + forms = [] + for control in controls: + shape = control.ufl_shape + flat_length = int(np.prod(shape)) + for i in range(flat_length): + c = control[_unravel(i, shape)] + control_form = diff(original_form, c) + control_form = action(control_form, coeff) + objective = data.object_by_name[get_form_option("objective_function")] + objective_gradient = diff(objective, c) + control_form = control_form + objective_gradient + forms.append(preprocess_form(control_form).preprocessed_form) + + # Used to create local operator default settings + form = preprocess_form(original_form).preprocessed_form + + else: + form = preprocess_form(original_form).preprocessed_form + + # Reset the generation cache + from dune.perftool.generation import delete_cache_items + delete_cache_items() + + # Have a data structure collect the generated kernels + operator_kernels = {} + + # Generate things needed for all local operator files + local_operator_default_settings(operator, form) + + if get_form_option("control"): + logger.info("generate_localoperator_kernels: create methods for control operator") + operator_kernels.update(generate_control_kernels(forms)) + else: + logger.info("generate_localoperator_kernels: create residual methods") + operator_kernels.update(generate_residual_kernels(form, original_form)) + + # Generate the necessary jacobian methods + if not get_form_option("numerical_jacobian"): + logger.info("generate_localoperator_kernels: create jacobian methods") + operator_kernels.update(generate_jacobian_kernels(form, original_form)) # Return the set of generated kernels return operator_kernels -def generate_localoperator_file(formdata, kernels, filename): +def generate_localoperator_file(kernels, filename): + logger = logging.getLogger(__name__) + operator_methods = [] for k in kernels.values(): operator_methods.extend(k) + # Generate all the realizations of sum factorization kernel objects needed in this operator + sfkernels = [sf for sf in retrieve_cache_items("kernelimpl")] + if sfkernels: + logger.info("generate_localoperator_kernels: Create {} sumfact kernel realizations".format(len(sfkernels))) + + from dune.perftool.sumfact.realization import realize_sumfact_kernel_function + for sf, qp in sfkernels: + from dune.perftool.sumfact.tabulation import set_quadrature_points + set_quadrature_points(qp) + operator_methods.append(realize_sumfact_kernel_function(sf)) + if get_option('instrumentation_level') >= 3: include_file('dune/perftool/common/timer.hh', filetag='operatorfile') operator_methods.append(TimerMethod()) @@ -886,17 +1067,6 @@ def generate_localoperator_file(formdata, kernels, filename): # Write the file! from dune.perftool.file import generate_file - param = cgen_class_from_cache("parameterclass") # TODO take the name of this thing from the UFL file lop = cgen_class_from_cache("operator", members=operator_methods) - generate_file(filename, "operatorfile", [param, lop]) - - -def generate_localoperator_basefile(formdatas, data): - filename = get_option("operator_file") - for formdata in formdatas: - lop_filename = name_localoperator_file(formdata, data) - include_file(lop_filename, filetag="operatorbasefile") - - from dune.perftool.file import generate_file - generate_file(filename, "operatorbasefile", []) + generate_file(filename, "operatorfile", [lop]) diff --git a/python/dune/perftool/pdelab/parameter.py b/python/dune/perftool/pdelab/parameter.py deleted file mode 100644 index 63d47bf090ea69a4e14663f0ff94e2b42b5fa981..0000000000000000000000000000000000000000 --- a/python/dune/perftool/pdelab/parameter.py +++ /dev/null @@ -1,264 +0,0 @@ -""" Generators for parameter functions """ - -from dune.perftool.generation import (class_basename, - class_member, - constructor_parameter, - generator_factory, - get_backend, - get_global_context_value, - initializer_list, - kernel_cached, - preamble, - temporary_variable - ) -from dune.perftool.pdelab.geometry import (name_cell, - name_intersection, - ) -from dune.perftool.pdelab.quadrature import quadrature_preamble -from dune.perftool.tools import get_pymbolic_basename -from dune.perftool.cgen.clazz import AccessModifier -from dune.perftool.pdelab.localoperator import (class_type_from_cache, - localoperator_basename, - ) -from dune.perftool.loopy.target import type_floatingpoint - -from loopy.match import Writes - - -@class_basename(classtag="parameterclass") -def parameterclass_basename(formdata, data): - lopbase = localoperator_basename(formdata, data) - return "{}Params".format(lopbase) - - -@class_member(classtag="operator") -def define_parameterclass(name): - _, t = class_type_from_cache("parameterclass") - constructor_parameter("{}&".format(t), name + "_", classtag="operator") - initializer_list(name, [name + "_"], classtag="operator") - return "{}& {};".format(t, name) - - -def name_paramclass(): - formdata = get_global_context_value("formdata") - from dune.perftool.pdelab.driver.gridoperator import name_parameters - name = name_parameters(formdata) - define_parameterclass(name) - return name - - -@class_member(classtag="parameterclass") -def define_time(name): - initializer_list(name, ["0.0"], classtag="parameterclass") - ftype = type_floatingpoint() - return "{} {};".format(ftype, name) - - -def name_time(): - define_time("t") - return "t" - - -def define_set_time_method(): - define_set_time_method_parameterclass() - define_set_time_method_operator() - - -@class_member(classtag="operator") -def define_set_time_method_operator(): - time_name = name_time() - param = name_paramclass() - ftype = type_floatingpoint() - - result = ["// Set time in instationary case", - "void setTime ({} t_)".format(ftype), - "{", - " Dune::PDELab::InstationaryLocalOperatorDefaultMethods<{}>::setTime(t_);".format(ftype), - " {}.setTime(t_);".format(param), - "}" - ] - - return result - - -@class_member(classtag="parameterclass") -def define_set_time_method_parameterclass(): - time_name = name_time() - ftype = type_floatingpoint() - - result = ["// Set time in instationary case", - "void setTime ({} t_)".format(ftype), - "{", - " {} = t_;".format(time_name), - "}" - ] - - return result - - -def combine_tree_path_argnumber(element, tree_path_int): - # Return string combining tree_path and argnumber. - subel = element.extract_subelement_component(tree_path_int) - - def _flatten(x): - if isinstance(x, tuple): - return '_'.join(_flatten(i) for i in x if i != ()) - else: - return str(x) - - return _flatten(subel) - - -@class_member(classtag="parameterclass") -def define_parameter_function_class_member(name, expr, baset, shape, cell): - t = construct_nested_fieldvector(baset, shape) - - geot = "E" if cell else "I" - geo = geot.lower() - result = ["template<typename {}, typename X>".format(geot), - "{} {}(const {}& {}, const X& local) const".format(t, name, geot, geo), - "{", - ] - - # In the case of a non-scalar parameter function, recurse into leafs - if expr.element.value_shape(): - # Check that this is a VectorElement, as I have no idea how a parameter function - # over a non-vector mixed element should be well-defined in PDELab. - from ufl import VectorElement - assert isinstance(expr.element, VectorElement) - - result.append(" {} result(0.0);".format(t)) - - from dune.perftool.ufl.execution import split_expression - for i, subexpr in enumerate(split_expression(expr)): - child_name = "{}_{}".format(name, combine_tree_path_argnumber(expr.element, i)) - result.append(" result[{}] = {}({}, local);".format(i, child_name, geo)) - define_parameter_function_class_member(child_name, subexpr, baset, shape[1:], cell) - - result.append(" return result;") - - else: - # Evaluate a scalar parameter function - if expr.is_global: - result.append(" auto x = {}.geometry().global(local);".format(geo)) - else: - result.append(" auto x = local;") - - result.append(" " + expr.c_expr[0]) - - result.append("}") - - return result - - -@preamble -def evaluate_cellwise_constant_parameter_function(name, restriction): - param = name_paramclass() - entity = name_cell(restriction) - from dune.perftool.pdelab.geometry import name_localcenter - pos = name_localcenter() - - from dune.perftool.generation.loopy import valuearg - import numpy - valuearg(name) - - return 'auto {} = {}.{}({}, {});'.format(name, - name_paramclass(), - name, - entity, - pos, - ) - - -@preamble -def evaluate_intersectionwise_constant_parameter_function(name): - # Check that this is not a volume term, as that would not be well-defined - from dune.perftool.generation import get_global_context_value - it = get_global_context_value("integral_type") - assert it is not 'cell' - - param = name_paramclass() - intersection = name_intersection() - pos = name_localcenter() - - from dune.perftool.generation.loopy import valuearg - import numpy - valuearg(name) - - return 'auto {} = {}.{}({}, {});'.format(name, - name_paramclass(), - name, - intersection, - pos, - ) - - -def evaluate_cell_parameter_function(name, restriction): - param = name_paramclass() - entity = name_cell(restriction) - pos = get_backend(interface="qp_in_cell")(restriction) - return quadrature_preamble('{} = {}.{}({}, {});'.format(name, - name_paramclass(), - name, - entity, - str(pos), - ), - assignees=frozenset({name}), - read_variables=frozenset({get_pymbolic_basename(pos)}), - depends_on=frozenset({Writes(get_pymbolic_basename(pos))}), - ) - - -def evaluate_intersection_parameter_function(name): - # Check that this is not a volume term, as that would not be well-defined - from dune.perftool.generation import get_global_context_value - it = get_global_context_value("integral_type") - assert it is not 'cell' - - param = name_paramclass() - intersection = name_intersection() - pos = get_backend("quad_pos")() - return quadrature_preamble('{} = {}.{}({}, {});'.format(name, - name_paramclass(), - name, - intersection, - str(pos), - ), - assignees=frozenset({name}), - read_variables=frozenset({get_pymbolic_basename(pos)}), - depends_on=frozenset({Writes(get_pymbolic_basename(pos))}), - ) - - -def construct_nested_fieldvector(t, shape): - if len(shape) == 0: - return t - return 'Dune::FieldVector<{}, {}>'.format(construct_nested_fieldvector(t, shape[1:]), shape[0]) - - -@kernel_cached -def cell_parameter_function(name, expr, restriction, cellwise_constant, t='float64'): - shape = expr.ufl_element().value_shape() - shape_impl = ('fv',) * len(shape) - from dune.perftool.loopy.target import numpy_to_cpp_dtype - t = numpy_to_cpp_dtype(t) - define_parameter_function_class_member(name, expr, t, shape, True) - if cellwise_constant: - evaluate_cellwise_constant_parameter_function(name, restriction) - else: - temporary_variable(name, shape=shape, shape_impl=shape_impl) - evaluate_cell_parameter_function(name, restriction) - - -@kernel_cached -def intersection_parameter_function(name, expr, cellwise_constant, t='float64'): - shape = expr.ufl_element().value_shape() - shape_impl = ('fv',) * len(shape) - from dune.perftool.loopy.target import numpy_to_cpp_dtype - t = numpy_to_cpp_dtype(t) - define_parameter_function_class_member(name, expr, t, shape, False) - if cellwise_constant: - evaluate_intersectionwise_constant_parameter_function(name) - else: - temporary_variable(name, shape=shape, shape_impl=shape_impl) - evaluate_intersection_parameter_function(name) diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py index 02e4a428348d81764ec9672c25370e9be6d14969..031d97b019df0cab8355d4295b5229584e077b9b 100644 --- a/python/dune/perftool/pdelab/quadrature.py +++ b/python/dune/perftool/pdelab/quadrature.py @@ -14,8 +14,7 @@ from dune.perftool.generation import (backend, valuearg, ) from dune.perftool.pdelab.localoperator import lop_template_range_field -from dune.perftool.options import get_option -from dune.perftool.ufl.modified_terminals import Restriction +from dune.perftool.options import get_form_option from pymbolic.primitives import Variable, Subscript @@ -184,7 +183,10 @@ def _estimate_quadrature_order(): """Estimate quadrature order using polynomial degree estimation from UFL""" # According to UFL documentation estimate_total_polynomial_degree # should only be called on preprocessed forms. - form = get_global_context_value("formdata").preprocessed_form + data = get_global_context_value("data") + form = data.object_by_name[get_form_option("form")] + from dune.perftool.ufl.preprocess import preprocess_form + form = preprocess_form(form).preprocessed_form # Estimate polynomial degree of integrals of current type (eg 'Cell') integral_type = get_global_context_value("integral_type") @@ -223,8 +225,8 @@ def quadrature_order(): - If you use sum factorization and TensorProductElement it is possible to use a different quadrature_order per direction. """ - if get_option("quadrature_order"): - quadrature_order = tuple(map(int, get_option("quadrature_order").split(','))) + if get_form_option("quadrature_order"): + quadrature_order = tuple(map(int, get_form_option("quadrature_order").split(','))) else: quadrature_order = _estimate_quadrature_order() @@ -235,7 +237,7 @@ def quadrature_order(): if len(quadrature_order) == 1: quadrature_order = quadrature_order[0] if isinstance(quadrature_order, tuple): - if not get_option('sumfact'): + if not get_form_option('sumfact'): raise NotImplementedError("Different quadrature order per direction is only implemented for kernels using sum factorization.") from dune.perftool.pdelab.geometry import world_dimension assert(len(quadrature_order) == world_dimension()) diff --git a/python/dune/perftool/pdelab/restriction.py b/python/dune/perftool/pdelab/restriction.py index 7d77a17b6c7e14107a359f711d110584b1a83cb2..03c55eaec4874c2611d892d74d8a14476cf8435d 100644 --- a/python/dune/perftool/pdelab/restriction.py +++ b/python/dune/perftool/pdelab/restriction.py @@ -2,9 +2,24 @@ from dune.perftool.ufl.modified_terminals import Restriction def restricted_name(name, restriction): + """Adapt name according to the restictrion + + Some remarks: + + - UFL defines the jump the following: jump(v) = v('+') - v('-'). + + - The corresponding outer normal vector is n = + FacetNormal(cell)('+'). The user needs to make the right choice + in the UFL file. + + - In the literature this convention is sometimes swapped. In order + to be consistent with UFL we choose ('+') as self and ('-') as + neighbor and choose the outer unit normal vector accordingly. + + """ if restriction == Restriction.NONE: return name if restriction == Restriction.POSITIVE: - return name + '_n' - if restriction == Restriction.NEGATIVE: return name + '_s' + if restriction == Restriction.NEGATIVE: + return name + '_n' diff --git a/python/dune/perftool/pdelab/signatures.py b/python/dune/perftool/pdelab/signatures.py index 97560a248a4aa28f3db489859c64dcb48d54c58c..2990c8b24b9c89bcca243dce78c9144914c26b44 100644 --- a/python/dune/perftool/pdelab/signatures.py +++ b/python/dune/perftool/pdelab/signatures.py @@ -53,7 +53,6 @@ def kernel_name(): def assembly_routine_signature(): integral_type = get_global_context_value("integral_type") form_type = get_global_context_value("form_type") - formdata = get_global_context_value("formdata") templates, args = {('residual', 'cell'): (alpha_volume_templates, alpha_volume_args), ('residual', 'exterior_facet'): (alpha_boundary_templates, alpha_boundary_args), @@ -66,7 +65,7 @@ def assembly_routine_signature(): if templates is None: # Check if form is linear from dune.perftool.pdelab.driver import is_linear - linear = is_linear(formdata.original_form) + linear = is_linear() templates, args = {('jacobian_apply', 'cell', True): (jacobian_apply_volume_templates, jacobian_apply_volume_args), ('jacobian_apply', 'exterior_facet', True): (jacobian_apply_boundary_templates, jacobian_apply_boundary_args), @@ -82,7 +81,6 @@ def assembly_routine_signature(): def assembly_routine_args(): integral_type = get_global_context_value("integral_type") form_type = get_global_context_value("form_type") - formdata = get_global_context_value("formdata") args = {('residual', 'cell'): alpha_volume_args, ('residual', 'exterior_facet'): alpha_boundary_args, @@ -95,7 +93,7 @@ def assembly_routine_args(): if args is None: # Check if form is linear from dune.perftool.pdelab.driver import is_linear - linear = is_linear(formdata.original_form) + linear = is_linear() args = {('jacobian_apply', 'cell', True): jacobian_apply_volume_args, ('jacobian_apply', 'exterior_facet', True): jacobian_apply_boundary_args, @@ -143,10 +141,10 @@ def alpha_boundary_templates(): def alpha_boundary_args(): geo = name_geometry_wrapper() - lfsu = name_trialfunctionspace(Restriction.NEGATIVE) - lfsv = name_testfunctionspace(Restriction.NEGATIVE) - cc = name_coefficientcontainer(Restriction.NEGATIVE) - av = name_accumulation_variable((Restriction.NEGATIVE,)) + lfsu = name_trialfunctionspace(Restriction.POSITIVE) + lfsv = name_testfunctionspace(Restriction.POSITIVE) + cc = name_coefficientcontainer(Restriction.POSITIVE) + av = name_accumulation_variable((Restriction.POSITIVE,)) return ((True, geo), (True, lfsu), (True, cc), (True, lfsv), (False, av)) @@ -161,14 +159,14 @@ def alpha_skeleton_templates(): def alpha_skeleton_args(): geo = name_geometry_wrapper() - lfsu_s = name_trialfunctionspace(Restriction.NEGATIVE) - lfsu_n = name_trialfunctionspace(Restriction.POSITIVE) - lfsv_s = name_testfunctionspace(Restriction.NEGATIVE) - lfsv_n = name_testfunctionspace(Restriction.POSITIVE) - cc_s = name_coefficientcontainer(Restriction.NEGATIVE) - cc_n = name_coefficientcontainer(Restriction.POSITIVE) - av_s = name_accumulation_variable((Restriction.NEGATIVE,)) - av_n = name_accumulation_variable((Restriction.POSITIVE,)) + lfsu_s = name_trialfunctionspace(Restriction.POSITIVE) + lfsu_n = name_trialfunctionspace(Restriction.NEGATIVE) + lfsv_s = name_testfunctionspace(Restriction.POSITIVE) + lfsv_n = name_testfunctionspace(Restriction.NEGATIVE) + cc_s = name_coefficientcontainer(Restriction.POSITIVE) + cc_n = name_coefficientcontainer(Restriction.NEGATIVE) + av_s = name_accumulation_variable((Restriction.POSITIVE,)) + av_n = name_accumulation_variable((Restriction.NEGATIVE,)) return ((True, geo), (True, lfsu_s), (True, cc_s), (True, lfsv_s), (True, lfsu_n), (True, cc_n), (True, lfsv_n), (False, av_s), (False, av_n)) @@ -201,10 +199,10 @@ def jacobian_boundary_templates(): def jacobian_boundary_args(): geo = name_geometry_wrapper() - lfsu = name_trialfunctionspace(Restriction.NEGATIVE) - lfsv = name_testfunctionspace(Restriction.NEGATIVE) - cc = name_coefficientcontainer(Restriction.NEGATIVE) - av = name_accumulation_variable((Restriction.NEGATIVE, Restriction.NEGATIVE)) + lfsu = name_trialfunctionspace(Restriction.POSITIVE) + lfsv = name_testfunctionspace(Restriction.POSITIVE) + cc = name_coefficientcontainer(Restriction.POSITIVE) + av = name_accumulation_variable((Restriction.POSITIVE, Restriction.POSITIVE)) return ((True, geo), (True, lfsu), (True, cc), (True, lfsv), (False, av)) @@ -219,16 +217,16 @@ def jacobian_skeleton_templates(): def jacobian_skeleton_args(): geo = name_geometry_wrapper() - lfsu_s = name_trialfunctionspace(Restriction.NEGATIVE) - lfsu_n = name_trialfunctionspace(Restriction.POSITIVE) - lfsv_s = name_testfunctionspace(Restriction.NEGATIVE) - lfsv_n = name_testfunctionspace(Restriction.POSITIVE) - cc_s = name_coefficientcontainer(Restriction.NEGATIVE) - cc_n = name_coefficientcontainer(Restriction.POSITIVE) - av_ss = name_accumulation_variable((Restriction.NEGATIVE, Restriction.NEGATIVE)) - av_sn = name_accumulation_variable((Restriction.NEGATIVE, Restriction.POSITIVE)) - av_ns = name_accumulation_variable((Restriction.POSITIVE, Restriction.NEGATIVE)) - av_nn = name_accumulation_variable((Restriction.POSITIVE, Restriction.POSITIVE)) + lfsu_s = name_trialfunctionspace(Restriction.POSITIVE) + lfsu_n = name_trialfunctionspace(Restriction.NEGATIVE) + lfsv_s = name_testfunctionspace(Restriction.POSITIVE) + lfsv_n = name_testfunctionspace(Restriction.NEGATIVE) + cc_s = name_coefficientcontainer(Restriction.POSITIVE) + cc_n = name_coefficientcontainer(Restriction.NEGATIVE) + av_ss = name_accumulation_variable((Restriction.POSITIVE, Restriction.POSITIVE)) + av_sn = name_accumulation_variable((Restriction.POSITIVE, Restriction.NEGATIVE)) + av_ns = name_accumulation_variable((Restriction.NEGATIVE, Restriction.POSITIVE)) + av_nn = name_accumulation_variable((Restriction.NEGATIVE, Restriction.NEGATIVE)) return ((True, geo), (True, lfsu_s), (True, cc_s), (True, lfsv_s), (True, lfsu_n), (True, cc_n), (True, lfsv_n), (False, av_ss), (False, av_sn), (False, av_ns), (False, av_nn)) @@ -261,10 +259,10 @@ def jacobian_apply_boundary_templates(): def jacobian_apply_boundary_args(): geo = name_geometry_wrapper() - lfsu = name_trialfunctionspace(Restriction.NEGATIVE) - lfsv = name_testfunctionspace(Restriction.NEGATIVE) - ac = name_applycontainer(Restriction.NEGATIVE) - av = name_accumulation_variable((Restriction.NEGATIVE,)) + lfsu = name_trialfunctionspace(Restriction.POSITIVE) + lfsv = name_testfunctionspace(Restriction.POSITIVE) + ac = name_applycontainer(Restriction.POSITIVE) + av = name_accumulation_variable((Restriction.POSITIVE,)) return ((True, geo), (True, lfsu), (True, ac), (True, lfsv), (False, av)) @@ -279,14 +277,14 @@ def jacobian_apply_skeleton_templates(): def jacobian_apply_skeleton_args(): geo = name_geometry_wrapper() - lfsu_s = name_trialfunctionspace(Restriction.NEGATIVE) - lfsu_n = name_trialfunctionspace(Restriction.POSITIVE) - lfsv_s = name_testfunctionspace(Restriction.NEGATIVE) - lfsv_n = name_testfunctionspace(Restriction.POSITIVE) - ac_s = name_applycontainer(Restriction.NEGATIVE) - ac_n = name_applycontainer(Restriction.POSITIVE) - av_s = name_accumulation_variable((Restriction.NEGATIVE,)) - av_n = name_accumulation_variable((Restriction.POSITIVE,)) + lfsu_s = name_trialfunctionspace(Restriction.POSITIVE) + lfsu_n = name_trialfunctionspace(Restriction.NEGATIVE) + lfsv_s = name_testfunctionspace(Restriction.POSITIVE) + lfsv_n = name_testfunctionspace(Restriction.NEGATIVE) + ac_s = name_applycontainer(Restriction.POSITIVE) + ac_n = name_applycontainer(Restriction.NEGATIVE) + av_s = name_accumulation_variable((Restriction.POSITIVE,)) + av_n = name_accumulation_variable((Restriction.NEGATIVE,)) return ((True, geo), (True, lfsu_s), (True, ac_s), (True, lfsv_s), (True, lfsu_n), (True, ac_n), (True, lfsv_n), (False, av_s), (False, av_n)) @@ -320,11 +318,11 @@ def nonlinear_jacobian_apply_boundary_templates(): def nonlinear_jacobian_apply_boundary_args(): geo = name_geometry_wrapper() - lfsu = name_trialfunctionspace(Restriction.NEGATIVE) - lfsv = name_testfunctionspace(Restriction.NEGATIVE) - cc = name_coefficientcontainer(Restriction.NEGATIVE) - ac = name_applycontainer(Restriction.NEGATIVE) - av = name_accumulation_variable((Restriction.NEGATIVE,)) + lfsu = name_trialfunctionspace(Restriction.POSITIVE) + lfsv = name_testfunctionspace(Restriction.POSITIVE) + cc = name_coefficientcontainer(Restriction.POSITIVE) + ac = name_applycontainer(Restriction.POSITIVE) + av = name_accumulation_variable((Restriction.POSITIVE,)) return ((True, geo), (True, lfsu), (True, cc), (True, ac), (True, lfsv), (False, av)) @@ -339,14 +337,14 @@ def nonlinear_jacobian_apply_skeleton_templates(): def nonlinear_jacobian_apply_skeleton_args(): geo = name_geometry_wrapper() - lfsu_s = name_trialfunctionspace(Restriction.NEGATIVE) - lfsu_n = name_trialfunctionspace(Restriction.POSITIVE) - lfsv_s = name_testfunctionspace(Restriction.NEGATIVE) - lfsv_n = name_testfunctionspace(Restriction.POSITIVE) - cc_s = name_coefficientcontainer(Restriction.NEGATIVE) - cc_n = name_coefficientcontainer(Restriction.POSITIVE) - ac_s = name_applycontainer(Restriction.NEGATIVE) - ac_n = name_applycontainer(Restriction.POSITIVE) - av_s = name_accumulation_variable((Restriction.NEGATIVE,)) - av_n = name_accumulation_variable((Restriction.POSITIVE,)) + lfsu_s = name_trialfunctionspace(Restriction.POSITIVE) + lfsu_n = name_trialfunctionspace(Restriction.NEGATIVE) + lfsv_s = name_testfunctionspace(Restriction.POSITIVE) + lfsv_n = name_testfunctionspace(Restriction.NEGATIVE) + cc_s = name_coefficientcontainer(Restriction.POSITIVE) + cc_n = name_coefficientcontainer(Restriction.NEGATIVE) + ac_s = name_applycontainer(Restriction.POSITIVE) + ac_n = name_applycontainer(Restriction.NEGATIVE) + av_s = name_accumulation_variable((Restriction.POSITIVE,)) + av_n = name_accumulation_variable((Restriction.NEGATIVE,)) return ((True, geo), (True, lfsu_s), (True, cc_s), (True, ac_s), (True, lfsv_s), (True, lfsu_n), (True, cc_n), (True, ac_n), (True, lfsv_n), (False, av_s), (False, av_n)) diff --git a/python/dune/perftool/pdelab/spaces.py b/python/dune/perftool/pdelab/spaces.py index 706be5b91a186b990719227fe61f1336d01313a0..d5f62735ea0b1d98c3496f49f0d062fc62bd1e5a 100644 --- a/python/dune/perftool/pdelab/spaces.py +++ b/python/dune/perftool/pdelab/spaces.py @@ -125,7 +125,7 @@ type_gfs = partial(_function_space_traversal, defaultname=available_gfs_names, r def initialize_function_spaces(expr, visitor): restriction = visitor.restriction if visitor.measure == 'exterior_facet': - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE index = None from ufl import MixedElement diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index 3c935161e5d837d2397736a24a996fff4f312384..4e7c2a6991b1c6bda49862de0aa47167c1b1ea2a 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -9,34 +9,41 @@ from dune.perftool.generation import (backend, generator_factory, get_counted_variable, get_counter, + get_global_context_value, + globalarg, iname, instruction, post_include, kernel_cached, temporary_variable, transform, + valuearg ) -from dune.perftool.options import get_option +from dune.perftool.options import (get_form_option, + get_option, + ) from dune.perftool.loopy.flatten import flatten_index -from dune.perftool.loopy.buffer import get_buffer_temporary +from dune.perftool.loopy.target import type_floatingpoint from dune.perftool.sumfact.quadrature import nest_quadrature_loops +from dune.perftool.pdelab.driver import FEM_name_mangling from dune.perftool.pdelab.localoperator import determine_accumulation_space from dune.perftool.pdelab.restriction import restricted_name from dune.perftool.pdelab.signatures import assembler_routine_name from dune.perftool.pdelab.geometry import world_dimension +from dune.perftool.pdelab.spaces import name_lfs from dune.perftool.sumfact.tabulation import (basis_functions_per_direction, construct_basis_matrix_sequence, ) from dune.perftool.sumfact.switch import (get_facedir, get_facemod, ) -from dune.perftool.sumfact.symbolic import SumfactKernel, SumfactKernelInputBase +from dune.perftool.sumfact.symbolic import SumfactKernel, SumfactKernelInterfaceBase from dune.perftool.ufl.modified_terminals import extract_modified_arguments -from dune.perftool.tools import get_pymbolic_basename +from dune.perftool.tools import get_pymbolic_basename, get_leaf from dune.perftool.error import PerftoolError from dune.perftool.sumfact.quadrature import quadrature_inames -from pytools import ImmutableRecord +from pytools import ImmutableRecord, product import loopy as lp import numpy as np @@ -79,21 +86,182 @@ def accum_iname(element, bound, i): return sumfact_iname(bound, "accum{}".format(suffix)) -class AlreadyAssembledInput(SumfactKernelInputBase): - def __init__(self, index): - self.index = index - - def __eq__(self, other): - return type(self) == type(other) and self.index == other.index +class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): + def __init__(self, + accumvar=None, + restriction=None, + test_element=None, + test_element_index=None, + trial_element=None, + trial_element_index=None, + ): + # TODO: Isnt accumvar superfluous in the presence of all the other infos? + ImmutableRecord.__init__(self, + accumvar=accumvar, + restriction=restriction, + test_element=test_element, + test_element_index=test_element_index, + trial_element=trial_element, + trial_element_index=trial_element_index, + ) def __repr__(self): - return "AlreadyAssembledInput({})".format(self.index) + return ImmutableRecord.__repr__(self) + + @property + def stage(self): + return 3 + + @property + def direct_is_possible(self): + return get_form_option("fastdg") + + @property + def within_inames(self): + if self.trial_element is None: + return () + else: + from dune.perftool.sumfact.basis import lfs_inames + return lfs_inames(get_leaf(self.trial_element, self.trial_element_index), self.restriction) + + def realize(self, sf, result, insn_dep, inames=None, additional_inames=()): + trial_leaf_element = get_leaf(self.trial_element, self.trial_element_index) if self.trial_element is not None else None + + basis_size = tuple(mat.basis_size for mat in sf.matrix_sequence) + + if inames is None: + inames = tuple(accum_iname(trial_leaf_element, mat.rows, i) + for i, mat in enumerate(sf.matrix_sequence)) + + # Determine the expression to accumulate with. This depends on the vectorization strategy! + from dune.perftool.tools import maybe_wrap_subscript + result = maybe_wrap_subscript(result, tuple(prim.Variable(i) for i in inames)) + + # Collect the lfs and lfs indices for the accumulate call + restriction = (0, 0) if self.restriction is None else self.restriction + test_lfs = name_lfs(self.test_element, restriction[0], self.test_element_index) + valuearg(test_lfs, dtype=lp.types.NumpyType("str")) + test_lfs_index = flatten_index(tuple(prim.Variable(i) for i in inames), + basis_size, + order="f" + ) + + accum_args = [prim.Variable(test_lfs), test_lfs_index] + + # In the jacobian case, also determine the space for the ansatz space + if sf.within_inames: + # TODO the next line should get its inames from + # elsewhere. This is *NOT* robust (but works right now) + ansatz_lfs = name_lfs(self.trial_element, restriction[1], self.trial_element_index) + valuearg(ansatz_lfs, dtype=lp.types.NumpyType("str")) + from dune.perftool.sumfact.basis import _basis_functions_per_direction + ansatz_lfs_index = flatten_index(tuple(prim.Variable(sf.within_inames[i]) + for i in range(world_dimension())), + _basis_functions_per_direction(trial_leaf_element), + order="f" + ) + + accum_args.append(prim.Variable(ansatz_lfs)) + accum_args.append(ansatz_lfs_index) + + accum_args.append(result) + + if not get_form_option("fastdg"): + rank = 2 if self.within_inames else 1 + expr = prim.Call(PDELabAccumulationFunction(self.accumvar, rank), + tuple(accum_args) + ) + dep = instruction(assignees=(), + expression=expr, + forced_iname_deps=frozenset(inames + additional_inames + self.within_inames), + forced_iname_deps_is_final=True, + depends_on=insn_dep, + predicates=sf.predicates, + tags=frozenset({"sumfact_stage3"}), + ) + + return frozenset({dep}) + + def realize_direct(self, result, inames, shape, which=0, **args): + direct_output = "fastdg{}".format(which) + ftags = ",".join(["f"] * len(shape)) + + if self.trial_element is None: + globalarg(direct_output, + shape=shape, + dim_tags=ftags, + offset=_dof_offset(self.test_element, self.test_element_index), + ) + lhs = prim.Subscript(prim.Variable(direct_output), inames) + else: + rowsize = sum(tuple(s for s in _local_sizes(self.trial_element))) + manual_strides = tuple("stride:{}".format(rowsize * product(shape[:i])) for i in range(len(shape))) + offset = "jacobian_offset{}".format(which) + valuearg(offset) + globalarg(direct_output, + shape=shape, + offset=prim.Variable(offset) + rowsize * _dof_offset(self.test_element, self.test_element_index) + _dof_offset(self.trial_element, self.trial_element_index), + dim_tags=manual_strides, + ) + lhs = prim.Subscript(prim.Variable(direct_output), inames) + + result = prim.Sum((lhs, result)) + return frozenset({instruction(assignee=lhs, + expression=result, + tags=frozenset({"sumfact_stage3"}), + **args)}) + + @property + def function_name_suffix(self): + if get_form_option("fastdg"): + suffix = "_fastdg1_{}comp{}".format(FEM_name_mangling(self.test_element), self.test_element_index) + if self.within_inames: + suffix = "{}x{}comp{}".format(suffix, FEM_name_mangling(self.trial_element), self.trial_element_index) + return suffix + else: + return "" + + @property + def function_args(self): + if get_form_option("fastdg"): + ret = ("{}.data()".format(self.accumvar),) + if get_form_option("fastdg") and self.within_inames: + element = get_leaf(self.trial_element, self.trial_element_index) + shape = tuple(element.degree() + 1 for e in range(element.cell().geometric_dimension())) + jacobian_index = flatten_index(tuple(prim.Variable(i) for i in self.within_inames), shape, order="f") + ret = ret + (str(jacobian_index),) + return ret + else: + return () + + @property + def signature_args(self): + if get_form_option('fastdg'): + ret = ("{}* fastdg0".format(type_floatingpoint()),) + if self.within_inames: + ret = ret + ("unsigned int jacobian_offset0",) + return ret + else: + return () + + +def _local_sizes(element): + from ufl import FiniteElement, MixedElement + if isinstance(element, MixedElement): + for subel in element.sub_elements(): + for s in _local_sizes(subel): + yield s + else: + assert isinstance(element, FiniteElement) + yield (element.degree() + 1)**element.cell().geometric_dimension() - def __hash__(self): - return hash(self.index) - def __str__(self): - return "Input{}".format(self.index[0]) +def _dof_offset(element, component): + if component is None: + return 0 + else: + sizes = tuple(s for s in _local_sizes(element)) + return sum(sizes[0:component]) class SumfactAccumulationInfo(ImmutableRecord): @@ -131,7 +299,7 @@ def get_accumulation_info(expr, visitor): restriction = visitor.restriction if visitor.measure == 'exterior_facet': from dune.perftool.pdelab.restriction import Restriction - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE inames = visitor.interface.lfs_inames(leaf_element, restriction, @@ -173,9 +341,9 @@ def _test_generator(expr, visitor): if visitor.measure == "cell": restrictions = (Restriction.NONE,) elif visitor.measure == "exterior_facet": - restrictions = (Restriction.NEGATIVE,) + restrictions = (Restriction.POSITIVE,) elif visitor.measure == "interior_facet": - restrictions = (Restriction.NEGATIVE, Restriction.POSITIVE) + restrictions = (Restriction.POSITIVE, Restriction.NEGATIVE) for res in restrictions: for ei, e in _get_childs(element): for grad in (None,) + tuple(range(dim)): @@ -196,9 +364,9 @@ def _trial_generator(expr, visitor): if visitor.measure == "cell": restrictions = (Restriction.NONE,) elif visitor.measure == "exterior_facet": - restrictions = (Restriction.NEGATIVE,) + restrictions = (Restriction.POSITIVE,) elif visitor.measure == "interior_facet": - restrictions = (Restriction.NEGATIVE, Restriction.POSITIVE) + restrictions = (Restriction.POSITIVE, Restriction.NEGATIVE) for res in restrictions: for ei, e in _get_childs(element): yield SumfactAccumulationInfo(element_index=ei, restriction=res, element=e) @@ -264,16 +432,17 @@ def generate_accumulation_instruction(expr, visitor): if priority is None: priority = 3 + output = AccumulationOutput(accumvar=accumvar, + restriction=(test_info.restriction, trial_info.restriction), + test_element=test_info.element, + test_element_index=test_info.element_index, + trial_element=trial_info.element, + trial_element_index=trial_info.element_index, + ) + sf = SumfactKernel(matrix_sequence=matrix_sequence, - restriction=(test_info.restriction, trial_info.restriction), - stage=3, position_priority=priority, - accumvar=accumvar, - test_element=test_info.element, - test_element_index=test_info.element_index, - trial_element=trial_info.element, - trial_element_index=trial_info.element_index, - input=AlreadyAssembledInput(index=(test_info.element_index,)), + interface=output, predicates=predicates, ) @@ -287,11 +456,14 @@ def generate_accumulation_instruction(expr, visitor): vectag = frozenset({"gradvec"}) if vsf.vectorized else frozenset() - temp = get_buffer_temporary(buffer, - shape=vsf.quadrature_shape, - dim_tags=vsf.quadrature_dimtags, - name="input_{}".format(buffer), - ) + from dune.perftool.sumfact.realization import name_buffer_storage + temp = "input_{}".format(buffer) + temporary_variable(temp, + shape=vsf.quadrature_shape, + dim_tags=vsf.quadrature_dimtags, + custom_base_storage=name_buffer_storage(buffer, 0), + managed=True, + ) # Those input fields, that are padded need to be set to zero # in order to do a horizontal_add later on @@ -304,16 +476,6 @@ def generate_accumulation_instruction(expr, visitor): tags=frozenset(["quadvec", "gradvec"]), ) - # Write timing stuff for jacobian (for alpha methods it is done at the end of stage 1) - timer_dep = frozenset() - if get_option("instrumentation_level") >= 4: - timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' - post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') - dump_accumulate_timer(timer_name) - if(jacobian_inames): - timer_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name), - within_inames=frozenset(jacobian_inames))}) - # Determine dependencies from loopy.match import Or, Writes from loopy.symbolic import DependencyMapper @@ -328,68 +490,17 @@ def generate_accumulation_instruction(expr, visitor): expression=expr, forced_iname_deps=frozenset(quadrature_inames(trial_leaf_element) + jacobian_inames), forced_iname_deps_is_final=True, - tags=frozenset({"quadvec"}).union(vectag), - depends_on=frozenset({deps}).union(timer_dep).union(frozenset({lp.match.Tagged("sumfact_stage1")})), + tags=frozenset({"quadvec", "sumfact_stage2"}).union(vectag), + depends_on=frozenset({deps}).union(frozenset({lp.match.Tagged("sumfact_stage1")})), ) if insn_dep is None: insn_dep = frozenset({contrib_dep}) - if get_option("instrumentation_level") >= 4: - insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), - depends_on=insn_dep, - within_inames=frozenset(jacobian_inames))}) - - inames = tuple(accum_iname(trial_leaf_element, mat.rows, i) - for i, mat in enumerate(vsf.matrix_sequence)) - - # Collect the lfs and lfs indices for the accumulate call - test_lfs.index = flatten_index(tuple(prim.Variable(i) for i in inames), - basis_size, - order="f" - ) - - # In the jacobian case, also determine the space for the ansatz space - if jacobian_inames: - # TODO the next line should get its inames from - # elsewhere. This is *NOT* robust (but works right now) - from dune.perftool.sumfact.basis import _basis_functions_per_direction - ansatz_lfs.index = flatten_index(tuple(prim.Variable(jacobian_inames[i]) - for i in range(world_dimension())), - _basis_functions_per_direction(trial_leaf_element), - order="f" - ) - # Add a sum factorization kernel that implements the multiplication # with the test function (stage 3) from dune.perftool.sumfact.realization import realize_sum_factorization_kernel result, insn_dep = realize_sum_factorization_kernel(vsf.copy(insn_dep=vsf.insn_dep.union(insn_dep))) - # Determine the expression to accumulate with. This depends on the vectorization strategy! - result = prim.Subscript(result, tuple(prim.Variable(i) for i in inames)) - vecinames = () - - if vsf.vectorized: - iname = accum_iname(trial_leaf_element, vsf.vector_width, "vec") - vecinames = (iname,) - transform(lp.tag_inames, [(iname, "vec")]) - from dune.perftool.tools import maybe_wrap_subscript - result = prim.Call(prim.Variable("horizontal_add"), - (maybe_wrap_subscript(result, prim.Variable(iname)),), - ) - - if not get_option("fastdg"): - rank = 2 if jacobian_inames else 1 - expr = prim.Call(PDELabAccumulationFunction(accumvar, rank), - (test_lfs.get_args() + - ansatz_lfs.get_args() + - (result,) - ) - ) - instruction(assignees=(), - expression=expr, - forced_iname_deps=frozenset(inames + vecinames + jacobian_inames), - forced_iname_deps_is_final=True, - depends_on=insn_dep, - predicates=predicates - ) + if not get_form_option("fastdg"): + insn_dep = vsf.interface.realize(vsf, result, insn_dep) diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index 46563ee1f6805037b13b13136e16f2fcc1d23ff0..39bba49e67fe66a4541ac0ae633e6e403be32c52 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -11,11 +11,13 @@ from dune.perftool.generation import (backend, get_counted_variable, get_counter, get_global_context_value, + globalarg, iname, instruction, kernel_cached, temporary_variable, ) +from dune.perftool.loopy.target import type_floatingpoint from dune.perftool.sumfact.tabulation import (basis_functions_per_direction, construct_basis_matrix_sequence, BasisTabulationMatrix, @@ -31,9 +33,8 @@ from dune.perftool.pdelab.argument import name_coefficientcontainer from dune.perftool.pdelab.geometry import (local_dimension, world_dimension, ) -from dune.perftool.loopy.buffer import initialize_buffer, get_buffer_temporary -from dune.perftool.sumfact.symbolic import SumfactKernel, SumfactKernelInputBase -from dune.perftool.options import get_option +from dune.perftool.sumfact.symbolic import SumfactKernel, SumfactKernelInterfaceBase +from dune.perftool.options import get_form_option from dune.perftool.pdelab.driver import FEM_name_mangling from dune.perftool.pdelab.restriction import restricted_name from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, name_leaf_lfs @@ -50,7 +51,7 @@ from loopy.match import Writes import pymbolic.primitives as prim -class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord): +class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): def __init__(self, coeff_func=None, element=None, @@ -64,10 +65,21 @@ class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord): restriction=restriction, ) - def __str__(self): + def __repr__(self): return "{}_{}".format(self.coeff_func(self.restriction), self.element_index) - def realize(self, sf, index, insn_dep): + def __str__(self): + return repr(self) + + @property + def stage(self): + return 1 + + @property + def direct_is_possible(self): + return get_form_option("fastdg") + + def realize(self, sf, insn_dep, index=0): lfs = name_lfs(self.element, self.restriction, self.element_index) basisiname = sumfact_iname(name_lfs_bound(lfs), "basis") container = self.coeff_func(self.restriction) @@ -75,25 +87,57 @@ class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord): coeff = pc(container, lfs, basisiname) # Get the input temporary! - name = get_buffer_temporary(sf.buffer, - shape=(product(mat.basis_size for mat in sf.matrix_sequence), sf.vector_width), - name="input_{}".format(sf.buffer) - ) + from dune.perftool.sumfact.realization import name_buffer_storage + name = "input_{}".format(sf.buffer) + temporary_variable(name, + shape=(product(mat.basis_size for mat in sf.matrix_sequence), sf.vector_width), + custom_base_storage=name_buffer_storage(sf.buffer, 0), + managed=True, + ) assignee = prim.Subscript(prim.Variable(name), (prim.Variable(basisiname),) + (index,)) - instruction(assignee=assignee, - expression=coeff, - depends_on=sf.insn_dep.union(insn_dep), - tags=frozenset({"sumfact_stage{}".format(sf.stage)}), - ) + insn = instruction(assignee=assignee, + expression=coeff, + depends_on=sf.insn_dep.union(insn_dep), + tags=frozenset({"sumfact_stage{}".format(sf.stage)}), + ) + + return insn_dep.union(frozenset({insn})) + + def realize_direct(self, shape, inames, which=0): + arg = "fastdg{}".format(which) + + from dune.perftool.sumfact.accumulation import _dof_offset + globalarg(arg, + shape=shape, + dim_tags=",".join("f" * len(shape)), + offset=_dof_offset(self.element, self.element_index), + ) + + return prim.Subscript(prim.Variable(arg), inames) + + @property + def function_name_suffix(self): + if get_form_option("fastdg"): + return "_fastdg1_{}comp{}".format(FEM_name_mangling(self.element), self.element_index) + else: + return "" + + @property + def function_args(self): + if get_form_option("fastdg"): + func = self.coeff_func(self.restriction) + return ("{}.data()".format(func),) + else: + return () @property - def direct_input(self): - if get_option("fastdg"): - return self.coeff_func(self.restriction) + def signature_args(self): + if get_form_option("fastdg"): + return ("const {}* fastdg0".format(type_floatingpoint()),) else: - return None + return () def _basis_functions_per_direction(element): @@ -141,7 +185,7 @@ def pymbolic_coefficient_gradient(element, restriction, index, coeff_func, visit # The sum factorization kernel object gathering all relevant information sf = SumfactKernel(matrix_sequence=matrix_sequence, position_priority=grad_index, - input=inp, + interface=inp, ) from dune.perftool.sumfact.vectorization import attach_vectorization_info @@ -182,7 +226,7 @@ def pymbolic_coefficient(element, restriction, index, coeff_func, visitor): ) sf = SumfactKernel(matrix_sequence=matrix_sequence, - input=inp, + interface=inp, position_priority=3, ) diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py index e17b7aac660718cb424cae73fe2c7c60452fb5f2..7b78de412d2e2c5a893e2d1d1d7e32315bd6aafd 100644 --- a/python/dune/perftool/sumfact/geometry.py +++ b/python/dune/perftool/sumfact/geometry.py @@ -12,15 +12,14 @@ from dune.perftool.generation import (backend, temporary_variable, globalarg, ) -from dune.perftool.loopy.buffer import get_buffer_temporary from dune.perftool.pdelab.geometry import (local_dimension, world_dimension, name_geometry, ) from dune.perftool.sumfact.switch import get_facedir -from dune.perftool.sumfact.symbolic import SumfactKernelInputBase +from dune.perftool.sumfact.symbolic import SumfactKernelInterfaceBase from dune.perftool.sumfact.vectorization import attach_vectorization_info -from dune.perftool.options import get_option, option_switch +from dune.perftool.options import get_form_option, option_switch from dune.perftool.ufl.modified_terminals import Restriction from pytools import ImmutableRecord @@ -36,15 +35,18 @@ def corner_iname(): return name -class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord): +class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): def __init__(self, dir): ImmutableRecord.__init__(self, dir=dir) def realize(self, sf, index, insn_dep): - name = get_buffer_temporary(sf.buffer, - shape=(2 ** local_dimension(), sf.vector_width), - name="input_{}".format(sf.buffer) - ) + from dune.perftool.sumfact.realization import name_buffer_storage + name = "input_{}".format(sf.buffer) + temporary_variable(name, + shape=(2 ** local_dimension(), sf.vector_width), + custom_base_storage=name_buffer_storage(sf.buffer, 0), + managed=True, + ) ciname = corner_iname() geo = name_geometry() @@ -152,7 +154,7 @@ def pymbolic_spatial_coordinate_axiparallel(do_predicates, visitor): restriction = Restriction.NONE from dune.perftool.generation import get_global_context_value if get_global_context_value("integral_type") == "interior_facet": - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE from dune.perftool.sumfact.switch import get_facedir face = get_facedir(restriction) @@ -181,10 +183,10 @@ def pymbolic_spatial_coordinate_axiparallel(do_predicates, visitor): def pymbolic_unit_outer_normal(visitor_indices): index, = visitor_indices assert isinstance(index, int) - if get_option("diagonal_transformation_matrix"): + if get_form_option("diagonal_transformation_matrix"): from dune.perftool.sumfact.switch import get_facedir, get_facemod - if index == get_facedir(Restriction.NEGATIVE): - if get_facemod(Restriction.NEGATIVE): + if index == get_facedir(Restriction.POSITIVE): + if get_facemod(Restriction.POSITIVE): return 1, None else: return -1, None @@ -198,10 +200,10 @@ def pymbolic_unit_outer_normal(visitor_indices): def pymbolic_unit_inner_normal(visitor_indices): index, = visitor_indices assert isinstance(index, int) - if get_option("diagonal_transformation_matrix"): + if get_form_option("diagonal_transformation_matrix"): from dune.perftool.sumfact.switch import get_facedir, get_facemod - if index == get_facedir(Restriction.NEGATIVE): - if get_facemod(Restriction.NEGATIVE): + if index == get_facedir(Restriction.POSITIVE): + if get_facemod(Restriction.POSITIVE): return -1, None else: return 1, None @@ -213,7 +215,7 @@ def pymbolic_unit_inner_normal(visitor_indices): def pymbolic_facet_jacobian_determinant(): - if get_option("constant_transformation_matrix"): + if get_form_option("constant_transformation_matrix"): return pymbolic_constant_facet_jacobian_determinant() else: from dune.perftool.pdelab.geometry import pymbolic_facet_jacobian_determinant as _norm @@ -221,7 +223,7 @@ def pymbolic_facet_jacobian_determinant(): def pymbolic_constant_facet_jacobian_determinant(): - facedir = get_facedir(Restriction.NEGATIVE) + facedir = get_facedir(Restriction.POSITIVE) assert isinstance(facedir, int) name = "fdetjac" @@ -256,7 +258,7 @@ def define_constant_facet_jacobian_determinant_eval(name): def pymbolic_facet_area(): - if get_option("constant_transformation_matrix"): + if get_form_option("constant_transformation_matrix"): return pymbolic_facet_jacobian_determinant() else: from dune.perftool.pdelab.geometry import pymbolic_facet_area as _norm diff --git a/python/dune/perftool/sumfact/quadrature.py b/python/dune/perftool/sumfact/quadrature.py index 8209a41f2890078aba8ffb25b708937fda2c7d01..2b709293a7f2c556c43a3812654c6e178e7d971f 100644 --- a/python/dune/perftool/sumfact/quadrature.py +++ b/python/dune/perftool/sumfact/quadrature.py @@ -18,7 +18,7 @@ from dune.perftool.pdelab.argument import name_accumulation_variable from dune.perftool.pdelab.geometry import (local_dimension, world_dimension, ) -from dune.perftool.options import get_option +from dune.perftool.options import get_form_option from dune.perftool.sumfact.switch import get_facedir from dune.perftool.loopy.target import dtype_floatingpoint @@ -142,7 +142,7 @@ def recursive_quadrature_weight(visitor, direction=0): def quadrature_weight(visitor): # Return non-precomputed version - if not get_option("precompute_quadrature_info"): + if not get_form_option("precompute_quadrature_info"): return recursive_quadrature_weight(visitor) # Quadrature points per (local) direction @@ -195,7 +195,7 @@ def define_quadrature_position(name, index): @backend(interface="quad_pos", name="sumfact") def pymbolic_quadrature_position(index, visitor): # Return the non-precomputed version - if not get_option("precompute_quadrature_info"): + if not get_form_option("precompute_quadrature_info"): name = 'pos' temporary_variable(name, shape=(local_dimension(),), shape_impl=("fv",)) define_quadrature_position(name, index) diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 703e3e062b72a5615d57d205033a861bb5388ba3..777f8dab972edba7ee55398f7a2d41c37d3c03b4 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -3,34 +3,42 @@ The code that triggers the creation of the necessary code constructs to realize a sum factorization kernel """ from dune.perftool.generation import (barrier, + delete_cache_items, dump_accumulate_timer, generator_factory, get_global_context_value, globalarg, instruction, + kernel_cached, post_include, preamble, silenced_warning, temporary_variable, transform, ) -from dune.perftool.loopy.buffer import (get_buffer_temporary, - switch_base_storage, - ) +from dune.perftool.loopy.flatten import flatten_index from dune.perftool.pdelab.argument import pymbolic_coefficient from dune.perftool.pdelab.basis import shape_as_pymbolic from dune.perftool.pdelab.geometry import world_dimension -from dune.perftool.options import get_option +from dune.perftool.options import (get_form_option, + get_option, + ) from dune.perftool.pdelab.signatures import assembler_routine_name from dune.perftool.sumfact.permutation import (sumfact_permutation_strategy, permute_backward, permute_forward, ) +from dune.perftool.sumfact.quadrature import quadrature_points_per_direction +from dune.perftool.sumfact.symbolic import (SumfactKernel, + VectorizedSumfactKernel, + ) from dune.perftool.sumfact.vectorization import attach_vectorization_info from dune.perftool.sumfact.accumulation import sumfact_iname from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.vcl import ExplicitVCLCast +from dune.perftool.tools import get_leaf, remove_duplicates +from pytools import product from ufl import MixedElement import loopy as lp @@ -38,6 +46,11 @@ import numpy as np import pymbolic.primitives as prim +# Have a generator function store the necessary sum factorization kernel implementations +# This way then can easily be extracted at the end of the form visiting process +necessary_kernel_implementations = generator_factory(item_tags=("kernelimpl",), cache_key_generator=lambda a: a[0].function_name, no_deco=True) + + def realize_sum_factorization_kernel(sf, **kwargs): if get_global_context_value("dry_run", False): return sf, sf.insn_dep @@ -45,50 +58,88 @@ def realize_sum_factorization_kernel(sf, **kwargs): return _realize_sum_factorization_kernel(sf, **kwargs) -@preamble -def alias_data_array(name, data): - return "auto {} = {}.data();".format(name, data) +def name_buffer_storage(buff, which): + name = "{}_{}".format(buff, which) + return name -@generator_factory(item_tags=("sumfactkernel",), - context_tags=("kernel",), - cache_key_generator=lambda s, **kw: s.cache_key) +@kernel_cached def _realize_sum_factorization_kernel(sf): insn_dep = sf.insn_dep - # Measure times and count operations in c++ code - if get_option("instrumentation_level") >= 4: - if sf.stage == 1: - setuptimer = '{}_kernel_setup'.format(assembler_routine_name()) - insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer), - within_inames=frozenset(sf.within_inames), - depends_on=insn_dep)})) - - timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage) - post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') - dump_accumulate_timer(timer_name) - insn_dep = insn_dep.union(frozenset({instruction(code="HP_TIMER_START({});".format(timer_name), - within_inames=frozenset(sf.within_inames), - depends_on=insn_dep, - ), - })) - - direct_input = sf.input.direct_input - - # Set up the input for stage 1 - if direct_input is None: - if sf.vectorized: - for i, inputsf in enumerate(sf.kernels): - inputsf.input.realize(sf, i, inputsf.insn_dep.union(insn_dep)) - else: - sf.input.realize(sf, 0, insn_dep) + # Get all the necessary pieces for a function call + buffers = tuple(name_buffer_storage(sf.buffer, i) for i in range(2)) + + # Make sure that the storage is allocated and has a certain minimum size + # This is necessary to allocate buffers that will be passed to sumfact kernel + # functions. Loopy has no knowledge of what happens with those... + for buf in buffers: + # Determine the necessary size of the buffer. We assume that we do not + # underintegrate the form!!! + size = max(product(m.quadrature_size for m in sf.matrix_sequence) * sf.vector_width, + product(m.basis_size for m in sf.matrix_sequence) * sf.vector_width) + temporary_variable("{}_dummy".format(buf), + shape=(size,), + custom_base_storage=buf, + decl_method=lambda n, k, di: None, + ) + + # Realize the input if it is not direct + if sf.stage == 1 and not sf.interface.direct_is_possible: + insn_dep = insn_dep.union(sf.interface.realize(sf, insn_dep)) + + # Trigger generation of the sum factorization kernel function + qp = quadrature_points_per_direction() + necessary_kernel_implementations((sf, qp)) + + # Call the function + code = "{}({});".format(sf.function_name, ", ".join(buffers + sf.interface.function_args)) + tag = "sumfact_stage{}".format(sf.stage) + insn_dep = frozenset({instruction(code=code, + depends_on=insn_dep, + within_inames=frozenset(sf.within_inames), + tags=frozenset({tag}), + predicates=sf.predicates, + ) + }) + + # Interpret the output as a temporary of correct shape + out = "{}_output".format(sf.buffer) + temporary_variable(out, + shape=sf.output_shape, + dim_tags=sf.output_dimtags, + custom_base_storage=buffers[sf.length % 2], + managed=True, + ) + silenced_warning("read_no_write({})".format(out)) - insn_dep = insn_dep.union(frozenset({lp.match.Writes("input_{}".format(sf.buffer))})) - else: - if sf.input.element_index is None: - direct_input_arg = "{}_access".format(direct_input) - else: - direct_input_arg = "{}_access_comp{}".format(direct_input, sf.input.element_index) + return lp.TaggedVariable(out, sf.tag), insn_dep + + +class BufferSwitcher(object): + def __init__(self): + self.current = 0 + + def get_temporary(self, name=None, **kwargs): + assert name + bs = "buffer{}".format(self.current) + globalarg(bs) + temporary_variable(name, + managed=True, + custom_base_storage=bs, + **kwargs + ) + + return name + + def switch(self): + self.current = (self.current + 1) % 2 + + +def realize_sumfact_kernel_function(sf): + # Get a buffer switcher instance + buffer = BufferSwitcher() + insn_dep = frozenset() # Prepare some dim_tags/shapes for later use ftags = ",".join(["f"] * sf.length) @@ -141,24 +192,12 @@ def _realize_sum_factorization_kernel(sf): # * a global data structure (if FastDGGridOperator is in use) # * a value from a global data structure, broadcasted to a vector type (vectorized + FastDGGridOperator) input_inames = (k_expr,) + tuple(prim.Variable(j) for j in out_inames[1:]) - if l == 0 and direct_input is not None: + if l == 0 and sf.stage == 1 and sf.interface.direct_is_possible: # See comment below input_inames = permute_backward(input_inames, perm) inp_shape = permute_backward(inp_shape, perm) - globalarg(direct_input_arg, - shape=inp_shape, - dim_tags=novec_ftags, - offset=_dof_offset(sf.input.element, sf.input.element_index), - ) - alias_data_array(direct_input_arg, direct_input) - if matrix.vectorized: - input_summand = prim.Call(ExplicitVCLCast(dtype_floatingpoint(), vector_width=sf.vector_width), - (prim.Subscript(prim.Variable(direct_input_arg), - input_inames),)) - else: - input_summand = prim.Subscript(prim.Variable(direct_input_arg), - input_inames + vec_iname) + input_summand = sf.interface.realize_direct(inp_shape, input_inames) else: # If we did permute the order of a matrices above we also # permuted the order of out_inames. Unfortunately the @@ -171,9 +210,10 @@ def _realize_sum_factorization_kernel(sf): # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. - inp = get_buffer_temporary(sf.buffer, + inp = buffer.get_temporary("buff_step{}_in".format(l), shape=inp_shape + vec_shape, - dim_tags=ftags) + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence the loopy warning silenced_warning('read_no_write({})'.format(inp)) @@ -181,7 +221,7 @@ def _realize_sum_factorization_kernel(sf): input_summand = prim.Subscript(prim.Variable(inp), input_inames + vec_iname) - switch_base_storage(sf.buffer) + buffer.switch() # Get a temporary that interprets the base storage of the output. # @@ -195,9 +235,10 @@ def _realize_sum_factorization_kernel(sf): output_shape = tuple(out_shape[1:]) + (out_shape[0],) if l == len(matrix_sequence) - 1: output_shape = permute_backward(output_shape, perm) - out = get_buffer_temporary(sf.buffer, + out = buffer.get_temporary("buff_step{}_out".format(l), shape=output_shape + vec_shape, - dim_tags=ftags) + dim_tags=ftags, + ) # Write the matrix-matrix multiplication expression matprod = prim.Product((matrix.pymbolic((prim.Variable(out_inames[0]), k_expr) + vec_iname), @@ -213,108 +254,28 @@ def _realize_sum_factorization_kernel(sf): if l == len(matrix_sequence) - 1: output_inames = permute_backward(output_inames, perm) + # Collect the key word arguments for the loopy instruction + insn_args = {"depends_on": insn_dep} + # In case of direct output we directly accumulate the result # of the Sumfactorization into some global data structure. - if l == len(matrix_sequence) - 1 and get_option('fastdg') and sf.stage == 3: - ft = get_global_context_value("form_type") - if sf.test_element_index is None: - direct_output = "{}_access".format(sf.accumvar) - else: - direct_output = "{}_access_comp{}".format(sf.accumvar, sf.test_element_index) - if ft == 'residual' or ft == 'jacobian_apply': - globalarg(direct_output, - shape=output_shape, - dim_tags=novec_ftags, - offset=_dof_offset(sf.test_element, sf.test_element_index), - ) - alias_data_array(direct_output, sf.accumvar) - - assignee = prim.Subscript(prim.Variable(direct_output), output_inames) - else: - assert ft == 'jacobian' - - direct_output = "{}x{}".format(direct_output, sf.trial_element_index) - rowsize = sum(tuple(s for s in _local_sizes(sf.trial_element))) - element = sf.trial_element - if isinstance(element, MixedElement): - element = element.extract_component(sf.trial_element_index)[1] - other_shape = tuple(element.degree() + 1 for e in range(sf.length)) - from pytools import product - manual_strides = tuple("stride:{}".format(rowsize * product(output_shape[:i])) for i in range(sf.length)) - dim_tags = "{},{}".format(novec_ftags, ",".join(manual_strides)) - globalarg(direct_output, - shape=other_shape + output_shape, - offset=rowsize * _dof_offset(sf.test_element, sf.test_element_index) + _dof_offset(sf.trial_element, sf.trial_element_index), - dim_tags=dim_tags, - ) - alias_data_array(direct_output, sf.accumvar) - # TODO: It is at least questionnable, whether using the *order* of the inames in here - # for indexing is a good idea. Then again, it is hard to find an alternative. - _ansatz_inames = tuple(prim.Variable(i) for i in sf.within_inames) - assignee = prim.Subscript(prim.Variable(direct_output), _ansatz_inames + output_inames) - - # In case of vectorization we need to apply a horizontal add - if matrix.vectorized: - matprod = prim.Call(prim.Variable("horizontal_add"), - (matprod,)) - - # We need to accumulate - matprod = prim.Sum((assignee, matprod)) + if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3: + if sf.vectorized: + insn_args["forced_iname_deps"] = frozenset({vec_iname[0].name}) + insn_dep = sf.interface.realize_direct(matprod, output_inames, out_shape, **insn_args) else: - assignee = prim.Subscript(prim.Variable(out), output_inames + vec_iname) - - tag = "sumfact_stage{}".format(sf.stage) - if sf.stage == 3: - tag = "{}_{}".format(tag, "_".join(sf.within_inames)) - - # Issue the reduction instruction that implements the multiplication - # at the same time store the instruction ID for the next instruction to depend on - insn_dep = frozenset({instruction(assignee=assignee, - expression=matprod, - forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)), - forced_iname_deps_is_final=True, - depends_on=insn_dep, - tags=frozenset({tag}), - predicates=sf.predicates, - groups=frozenset({sf.group_name}), - ) - }) - - # Measure times and count operations in c++ code - if get_option("instrumentation_level") >= 4: - stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), - depends_on=frozenset({lp.match.Tagged(tag)}), - within_inames=frozenset(sf.within_inames))}) - if sf.stage == 1: - qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' - post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') - dump_accumulate_timer(timer_name) - frozenset({instruction(code="HP_TIMER_START({});".format(qp_timer_name), - depends_on=stop_insn)}) - - out = get_buffer_temporary(sf.buffer, - shape=sf.output_shape, - dim_tags=sf.output_dimtags, - ) - silenced_warning('read_no_write({})'.format(out)) - - return lp.TaggedVariable(out, sf.tag), insn_dep - - -def _local_sizes(element): - from ufl import FiniteElement, MixedElement - if isinstance(element, MixedElement): - for subel in element.sub_elements(): - for s in _local_sizes(subel): - yield s - else: - assert isinstance(element, FiniteElement) - yield (element.degree() + 1)**element.cell().geometric_dimension() - - -def _dof_offset(element, component): - if component is None: - return 0 - else: - sizes = tuple(s for s in _local_sizes(element)) - return sum(sizes[0:component]) + # Issue the reduction instruction that implements the multiplication + # at the same time store the instruction ID for the next instruction to depend on + insn_dep = frozenset({instruction(assignee=prim.Subscript(prim.Variable(out), output_inames + vec_iname), + expression=matprod, + **insn_args + ) + }) + + # Construct a loopy kernel object + from dune.perftool.pdelab.localoperator import extract_kernel_from_cache + args = ("const char* buffer0", "const char* buffer1") + sf.interface.signature_args + signature = "void {}({}) const".format(sf.function_name, ", ".join(args)) + kernel = extract_kernel_from_cache("kernel_default", sf.function_name, [signature], add_timings=False) + delete_cache_items("kernel_default") + return kernel diff --git a/python/dune/perftool/sumfact/switch.py b/python/dune/perftool/sumfact/switch.py index a420850d6bdecb4e479688f7be8c3e1039572351..8c6a0f13ace27b6e3081030607149b7b003aa82c 100644 --- a/python/dune/perftool/sumfact/switch.py +++ b/python/dune/perftool/sumfact/switch.py @@ -10,13 +10,13 @@ from dune.perftool.pdelab.signatures import (assembly_routine_args, assembly_routine_signature, kernel_name, ) -from dune.perftool.options import get_option +from dune.perftool.options import get_form_option from dune.perftool.cgen.clazz import ClassMember @backend(interface="generate_kernels_per_integral", name="sumfact") def generate_kernels_per_integral(integrals): - dim = get_global_context_value("formdata").geometric_dimension + dim = world_dimension() measure = get_global_context_value("integral_type") if measure == "cell": @@ -53,7 +53,7 @@ def get_kernel_name(facedir_s=None, facemod_s=None, facedir_n=None, facemod_n=No def decide_if_kernel_is_necessary(facedir_s, facemod_s, facedir_n, facemod_n): # If we are not using YaspGrid, all variants need to be realized - if not get_option("diagonal_transformation_matrix"): + if not get_form_option("diagonal_transformation_matrix"): return True # The PDELab machineries visit-once policy combined with Yasp avoids any visits @@ -138,9 +138,9 @@ def generate_interior_facet_switch(): def get_facedir(restriction): from dune.perftool.pdelab.restriction import Restriction - if restriction == Restriction.NEGATIVE or get_global_context_value("integral_type") == "exterior_facet": + if restriction == Restriction.POSITIVE or get_global_context_value("integral_type") == "exterior_facet": return get_global_context_value("facedir_s") - if restriction == Restriction.POSITIVE: + if restriction == Restriction.NEGATIVE: return get_global_context_value("facedir_n") if restriction == Restriction.NONE: return None @@ -149,9 +149,9 @@ def get_facedir(restriction): def get_facemod(restriction): from dune.perftool.pdelab.restriction import Restriction - if restriction == Restriction.NEGATIVE or get_global_context_value("integral_type") == "exterior_facet": + if restriction == Restriction.POSITIVE or get_global_context_value("integral_type") == "exterior_facet": return get_global_context_value("facemod_s") - if restriction == Restriction.POSITIVE: + if restriction == Restriction.NEGATIVE: return get_global_context_value("facemod_n") if restriction == Restriction.NONE: return None diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index babb432bc324f6ce08cec9f931df7ba8191181fa..fb283a0536318c1585a0963a33ca76105571cb84 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -1,10 +1,16 @@ """ A pymbolic node representing a sum factorization kernel """ -from dune.perftool.options import get_option -from dune.perftool.generation import get_counted_variable +from dune.perftool.options import get_form_option, get_option +from dune.perftool.generation import (get_counted_variable, + subst_rule, + transform, + ) from dune.perftool.pdelab.geometry import local_dimension, world_dimension from dune.perftool.sumfact.quadrature import quadrature_inames from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase, BasisTabulationMatrixArray +from dune.perftool.loopy.target import dtype_floatingpoint, type_floatingpoint +from dune.perftool.loopy.vcl import ExplicitVCLCast, VCLLowerUpperLoad +from dune.perftool.tools import get_leaf, maybe_wrap_subscript, remove_duplicates from pytools import ImmutableRecord, product @@ -16,13 +22,194 @@ import frozendict import inspect -class SumfactKernelInputBase(object): +class SumfactKernelInterfaceBase(object): + """ A base class for the input/output of a sum factorization kernel + In stage 1, this represents the input object, in stage 3 the output object. + """ + def realize(self, *a, **kw): + raise NotImplementedError + + def realize_direct(self, *a, **kw): + raise NotImplementedError + + @property + def within_inames(self): + return () + + @property + def direct_is_possible(self): + return False + + @property + def stage(self): + raise NotImplementedError + + @property + def function_args(self): + return () + + @property + def signature_args(self): + return () + + @property + def function_name_suffix(self): + return "" + + def __repr__(self): + return "SumfactKernelInterfaceBase()" + + +class VectorSumfactKernelInput(SumfactKernelInterfaceBase): + def __init__(self, interfaces): + assert(isinstance(interfaces, tuple)) + self.interfaces = interfaces + + def __repr__(self): + return "_".join(repr(i) for i in self.interfaces) + + @property + def stage(self): + return 1 + + @property + def direct_is_possible(self): + return all(i.direct_is_possible for i in self.interfaces) + + def realize(self, sf, dep): + for i, inp in enumerate(self.interfaces): + dep = dep.union(inp.realize(sf, dep, index=i)) + return dep + + def realize_direct(self, shape, inames): + # Check whether the input exhibits a favorable structure + # (whether we can broadcast scalar values into SIMD registers) + total = set(self.interfaces) + lower = set(self.interfaces[:len(self.interfaces) // 2]) + upper = set(self.interfaces[len(self.interfaces) // 2:]) + + if len(total) == 1: + # All input coefficients use the exact same input coefficient. + # We implement this by broadcasting it into a SIMD register + return prim.Call(ExplicitVCLCast(dtype_floatingpoint()), + (self.interfaces[0].realize_direct(shape, inames),) + ) + elif len(total) == 2 and len(lower) == 1 and len(upper) == 1: + # The lower and the upper part of the SIMD register use + # the same input coefficient, we combine the SIMD register + # from two shorter SIMD types + return prim.Call(VCLLowerUpperLoad(dtype_floatingpoint()), + (self.interfaces[0].realize_direct(shape, inames), + self.interfaces[len(self.interfaces) // 2].realize_direct(shape, inames, which=1), + ) + ) + else: + # The input does not exhibit a broadcastable structure, we + # need to load scalars into the SIMD vector. + raise NotImplementedError("SIMD loads from scalars not implemented!") + + @property + def function_args(self): + return sum((i.function_args for i in remove_duplicates(self.interfaces)), ()) + + @property + def signature_args(self): + if get_form_option("fastdg"): + return tuple("const {}* fastdg{}".format(type_floatingpoint(), i) for i, _ in enumerate(remove_duplicates(self.interfaces))) + else: + return () + + @property + def function_name_suffix(self): + return "".join(i.function_name_suffix for i in remove_duplicates(self.interfaces)) + + +class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): + def __init__(self, interfaces): + self.interfaces = interfaces + + def __repr__(self): + return "_".join(repr(o) for o in self.interfaces) + @property - def direct_input(self): - return None + def stage(self): + return 3 + + @property + def within_inames(self): + return self.interfaces[0].within_inames + + def _add_hadd(self, o, result): + hadd_function = "horizontal_add" + if len(set(self.interfaces)) > 1: + pos = self.interfaces.index(o) + if pos == 0: + hadd_function = "horizontal_add_lower" + else: + hadd_function = "horizontal_add_upper" + + return prim.Call(prim.Variable(hadd_function), (result,)) + + def realize(self, sf, result, insn_dep): + outputs = set(self.interfaces) - def realize(self, sf, i, dep): - pass + trial_element, = set(o.trial_element for o in self.interfaces) + trial_element_index, = set(o.trial_element_index for o in self.interfaces) + from dune.perftool.sumfact.accumulation import accum_iname + element = get_leaf(trial_element, trial_element_index) if trial_element is not None else None + inames = tuple(accum_iname(element, mat.rows, i) + for i, mat in enumerate(sf.matrix_sequence)) + veciname = accum_iname(element, sf.vector_width // len(outputs), "vec") + transform(lp.tag_inames, [(veciname, "vec")]) + + deps = frozenset() + for o in outputs: + hadd_result = self._add_hadd(o, maybe_wrap_subscript(result, tuple(prim.Variable(iname) for iname in inames + (veciname,)))) + deps = deps.union(o.realize(sf, hadd_result, insn_dep, inames=inames, additional_inames=(veciname,))) + + return deps + + def realize_direct(self, result, inames, shape, **args): + outputs = set(self.interfaces) + + # If multiple horizontal_add's are to be performed with 'result' + # we need to precompute the result! + if len(outputs) > 1: + substname = "haddsubst_{}".format("_".join([i.name for i in inames])) + subst_rule(substname, (), result) + result = prim.Call(prim.Variable(substname), ()) + transform(lp.precompute, substname) + + deps = frozenset() + for o in outputs: + hadd_result = self._add_hadd(o, result) + which = tuple(remove_duplicates(self.interfaces)).index(o) + deps = deps.union(o.realize_direct(hadd_result, inames, shape, which=which, **args)) + + return deps + + @property + def function_args(self): + if get_form_option("fastdg"): + return sum((i.function_args for i in remove_duplicates(self.interfaces)), ()) + else: + return() + + @property + def signature_args(self): + if get_form_option("fastdg"): + def _get_pair(i): + ret = ("{}* fastdg{}".format(type_floatingpoint(), i),) + if self.within_inames: + ret = ret + ("unsigned int jacobian_offset{}".format(i),) + return ret + return sum((_get_pair(i) for i, _ in enumerate(remove_duplicates(self.interfaces))), ()) + else: + return () + + @property + def function_name_suffix(self): + return "".join(i.function_name_suffix for i in remove_duplicates(self.interfaces)) class SumfactKernelBase(object): @@ -33,16 +220,9 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def __init__(self, matrix_sequence=None, buffer=None, - stage=1, position_priority=None, - restriction=None, insn_dep=frozenset(), - input=None, - accumvar=None, - test_element=None, - test_element_index=None, - trial_element=None, - trial_element_index=None, + interface=SumfactKernelInterfaceBase(), predicates=frozenset(), ): """Create a sum factorization kernel @@ -96,34 +276,18 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): for intermediate results. The memory is expected to be pre-initialized with the input or you have to provide direct_input (FastDGGridOperator). - stage: 1 or 3 position_priority: Will be used in the dry run to order kernels when doing vectorization e.g. (dx u,dy u,dz u, u). - restriction: Restriction for faces values. insn_dep: An instruction ID that the first issued instruction should depend upon. All following ones will depend on each other. - input: An SumfactKernelInputBase instance describing the input of the kernel - accumvar: The accumulation variable to accumulate into - trial_element: The leaf element of the trial function space. - Used to correctly nest stage 3 in the jacobian case. - test_element: The leaf element of the test function space - Used to compute offsets in the fastdg case. - test_element_index: the component of the test_element - trial_element_index: the component of the trial_element + interface: An SumfactKernelInterfaceBase instance describing the input + (stage 1) or output (stage 3) of the kernel """ # Assert the inputs! assert isinstance(matrix_sequence, tuple) assert all(isinstance(m, BasisTabulationMatrixBase) for m in matrix_sequence) - - assert stage in (1, 3) - - if stage == 1: - assert isinstance(input, SumfactKernelInputBase) - - if stage == 3: - assert isinstance(restriction, tuple) - + assert isinstance(interface, SumfactKernelInterfaceBase) assert isinstance(insn_dep, frozenset) # The following construction is a bit weird: Dict comprehensions do not have @@ -149,7 +313,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def __str__(self): # Above stringifier just calls back into this return "SF{}:[{}]->[{}]".format(self.stage, - str(self.input), + str(self.interface), ", ".join(str(m) for m in self.matrix_sequence)) mapper_method = "map_sumfact_kernel" @@ -158,32 +322,56 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): # Some cache key definitions # Watch out for the documentation to see which key is used unter what circumstances # + @property + def function_name(self): + """ The name of the function that implements this kernel """ + return "sfimpl_{}{}".format("_".join(str(m) for m in self.matrix_sequence), + self.interface.function_name_suffix) + + @property + def parallel_key(self): + """ A key that identifies parallellizable kernels. """ + return tuple(m.basis_size for m in self.matrix_sequence) + (self.stage, self.buffer) @property def cache_key(self): """ The cache key that can be used in generation magic Any two sum factorization kernels having the same cache_key - are realized simulatenously! + are realized simultaneously! """ - return (self.matrix_sequence, self.restriction, self.stage, self.buffer, self.test_element_index) + if self.buffer is None: + # During dry run, we return something unique to this kernel + return repr(self) + else: + # Later we identify parallely implemented kernels by the assigned buffer + return self.buffer @property - def input_key(self): + def inout_key(self): """ A cache key for the input coefficients Any two sum factorization kernels having the same input_key - work on the same input coefficient (and are suitable for simultaneous - treatment because of that) + work on the same input coefficient (stage 1) or accumulate + into the same thing (stage 3) """ - return (self.input, self.restriction, self.accumvar, self.trial_element_index) - - @property - def group_name(self): - return "sfgroup_{}_{}_{}_{}".format(self.input, self.restriction, self.accumvar, self.trial_element_index) + return repr(self.interface) # # Some convenience methods to extract information about the sum factorization kernel # + def __lt__(self, other): + if self.parallel_key != other.parallel_key: + return self.parallel_key < other.parallel_key + if self.inout_key != other.inout_key: + return self.inout_key < other.inout_key + if self.position_priority == other.position_priority: + return repr(self) < repr(other) + if self.position_priority is None: + return False + if other.position_priority is None: + return True + return self.position_priority < other.position_priority + @property def length(self): """ The number of matrices to apply """ @@ -199,14 +387,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): @property def within_inames(self): - if self.trial_element is None: - return () - else: - from dune.perftool.sumfact.basis import lfs_inames - element = self.trial_element - if isinstance(element, MixedElement): - element = element.extract_component(self.trial_element_index)[1] - return lfs_inames(element, self.restriction) + return self.interface.within_inames def vec_index(self, sf): """ Map an unvectorized sumfact kernel object to its position @@ -292,6 +473,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def tag(self): return "sumfac" + @property + def stage(self): + return self.interface.stage + # # Define properties for conformity with the interface of VectorizedSumfactKernel # @@ -358,7 +543,6 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) # Assert all the properties that need to be the same across all subkernels assert len(set(k.stage for k in kernels)) == 1 assert len(set(k.length for k in kernels)) == 1 - assert len(set(k.restriction for k in kernels)) == 1 assert len(set(k.within_inames for k in kernels)) == 1 assert len(set(k.predicates for k in kernels)) == 1 @@ -366,7 +550,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) for i in range(kernels[0].length): assert len(set(tuple(k.matrix_sequence[i].rows for k in kernels))) == 1 assert len(set(tuple(k.matrix_sequence[i].cols for k in kernels))) == 1 - assert len(set(tuple(k.matrix_sequence[i].face for k in kernels))) == 1 + assert len(set(tuple(k.matrix_sequence[i].direction for k in kernels))) == 1 assert len(set(tuple(k.matrix_sequence[i].transpose for k in kernels))) == 1 # Join the instruction dependencies of all subkernels @@ -394,7 +578,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def __str__(self): # Above stringifier just calls back into this return "VSF{}:[{}]->[{}]".format(self.stage, - ", ".join(str(k.input) for k in self.kernels), + ", ".join(str(k.interface) for k in self.kernels), ", ".join(str(mat) for mat in self.matrix_sequence)) mapper_method = "map_vectorized_sumfact_kernel" @@ -405,6 +589,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) # Some cache key definitions # Watch out for the documentation to see which key is used unter what circumstances # + @property + def function_name(self): + return "sfimpl_{}{}".format("_".join(str(m) for m in self.matrix_sequence), + self.interface.function_name_suffix) @property def cache_key(self): @@ -437,36 +625,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def within_inames(self): return self.kernels[0].within_inames - @property - def test_element(self): - return self.kernels[0].test_element - - @property - def test_element_index(self): - return self.kernels[0].test_element_index - - @property - def trial_element(self): - return self.kernels[0].trial_element - - @property - def trial_element_index(self): - return self.kernels[0].trial_element_index - @property def predicates(self): return self.kernels[0].predicates - @property - def input(self): - assert len(set(k.input for k in self.kernels)) == 1 - return self.kernels[0].input - - @property - def accumvar(self): - assert len(set(k.accumvar for k in self.kernels)) == 1 - return self.kernels[0].accumvar - @property def transposed(self): return self.kernels[0].transposed @@ -487,16 +649,23 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) # @property - def cache_key(self): - return (tuple(k.cache_key for k in self.kernels), self.buffer) + def stage(self): + return self.kernels[0].stage + + @property + def interface(self): + if self.stage == 1: + return VectorSumfactKernelInput(tuple(k.interface for k in self.kernels)) + else: + return VectorSumfactKernelOutput(tuple(k.interface for k in self.kernels)) @property - def input_key(self): - return tuple(k.input_key for k in self.kernels) + def cache_key(self): + return (tuple(k.cache_key for k in self.kernels), self.buffer) @property - def group_name(self): - return "_".join(k.group_name for k in self.kernels) + def inout_key(self): + return tuple(k.inout_key for k in self.kernels) @property def length(self): @@ -507,10 +676,11 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) return True def horizontal_index(self, sf): - key = tuple(mat.derivative for mat in sf.matrix_sequence) for i, k in enumerate(self.kernels): - if tuple(mat.derivative for mat in k.matrix_sequence) == key: - return i + if sf.inout_key == k.inout_key: + if tuple(mat.derivative for mat in sf.matrix_sequence) == tuple(mat.derivative for mat in k.matrix_sequence): + return i + return 0 def _quadrature_index(self, sf, visitor): diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index cbe8fbbc374c6ada8e3c63a1a81adb5a3c742ac6..99107c9cd8f6a4429965985fb8fbbdcbd3e898e9 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -1,7 +1,5 @@ from dune.perftool.ufl.modified_terminals import Restriction -from dune.perftool.options import get_option - from dune.perftool.pdelab.argument import name_coefficientcontainer from dune.perftool.pdelab.geometry import world_dimension, local_dimension from dune.perftool.generation import (class_member, @@ -20,9 +18,8 @@ from dune.perftool.generation import (class_member, transform, valuearg ) -from dune.perftool.loopy.buffer import get_buffer_temporary from dune.perftool.loopy.target import dtype_floatingpoint -from dune.perftool.loopy.vcl import ExplicitVCLCast +from dune.perftool.loopy.vcl import ExplicitVCLCast, get_vcl_type_size from dune.perftool.pdelab.localoperator import (name_domain_field, lop_template_range_field, ) @@ -64,14 +61,27 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord): slice_index=slice_index, ) + @property + def _shortname(self): + infos = ["d{}".format(self.basis_size), + "q{}".format(self.quadrature_size)] + + if self.transpose: + infos.append("T") + + if self.derivative: + infos.append("dx") + + if self.face is not None: + infos.append("f{}".format(self.face)) + + if self.slice_size is not None: + infos.append("s{}".format(self.slice_index)) + + return "".join(infos) + def __str__(self): - return "{}{}A{}{}{}" \ - .format("face{}_".format(self.face) if self.face is not None else "", - "d" if self.derivative else "", - self.basis_size, - "T" if self.transpose else "", - "_slice{}".format(self.slice_index) if self.slice_size is not None else "", - ) + return "Theta_{}".format(self._shortname) @property def rows(self): @@ -98,14 +108,7 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord): return size def pymbolic(self, indices): - name = "{}{}Theta{}{}_qp{}_dof{}" \ - .format("face{}_".format(self.face) if self.face is not None else "", - "d" if self.derivative else "", - "T" if self.transpose else "", - "_slice{}".format(self.slice_index) if self.slice_size is not None else "", - self.quadrature_size, - self.basis_size, - ) + name = str(self) define_theta(name, self) return prim.Subscript(prim.Variable(name), indices) @@ -133,7 +136,7 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): assert len(set(t.quadrature_size for t in tabs)) == 1 assert len(set(t.basis_size for t in tabs)) == 1 assert len(set(t.transpose for t in tabs)) == 1 - assert len(set(t.face for t in tabs)) == 1 + assert len(set(t.direction for t in tabs)) == 1 assert len(set(t.slice_size for t in tabs)) == 1 self.tabs = tabs @@ -142,11 +145,7 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): self.width = width def __str__(self): - abbrevs = tuple("{}A{}{}".format("d" if t.derivative else "", - self.basis_size, - "s{}".format(t.slice_index) if t.slice_size is not None else "") - for t in self.tabs) - return "_".join(abbrevs) + return "Theta{}".format("_".join((t._shortname for t in self.tabs))) @property def quadrature_size(self): @@ -196,17 +195,10 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): # Check whether we can realize this by broadcasting the values of a simple tabulation if len(set(self.tabs)) == 1: theta = self.tabs[0].pymbolic(indices[:-1]) - return prim.Call(ExplicitVCLCast(dtype_floatingpoint(), vector_width=len(self.tabs)), (theta,)) - - abbrevs = tuple("{}x{}".format("d" if t.derivative else "", - "s{}".format(t.slice_index) if t.slice_size is not None else "") - for t in self.tabs) - name = "ThetaLarge{}{}_{}_qp{}_dof{}".format("face{}_".format(self.face) if self.face is not None else "", - "T" if self.transpose else "", - "_".join(abbrevs), - self.tabs[0].quadrature_size, - self.tabs[0].basis_size, - ) + return prim.Call(ExplicitVCLCast(dtype_floatingpoint(), vector_width=get_vcl_type_size(dtype_floatingpoint())), (theta,)) + + name = str(self) + for i, tab in enumerate(self.tabs): define_theta(name, tab, additional_indices=(i,), width=self.width) @@ -288,7 +280,10 @@ def local_quadrature_points_per_direction(): def polynomial_degree(): - form = get_global_context_value("formdata").preprocessed_form + data = get_global_context_value("data") + form = data.object_by_name[get_form_option("form")] + from dune.perftool.ufl.preprocess import preprocess_form + form = preprocess_form(form).preprocessed_form degree = form.coefficients()[0].ufl_element().degree() if isinstance(degree, int): degree = (degree,) * world_dimension() @@ -407,9 +402,7 @@ def define_theta(name, tabmat, additional_indices=(), width=None): bound = tabmat.quadrature_size if tabmat.slice_size is not None: bound *= tabmat.slice_size - qp = name_oned_quadrature_points(bound) - qw = name_oned_quadrature_weights(bound) - sort_quadrature_points_weights(qp, qw, bound) + degree = tabmat.basis_size - 1 polynomials = name_polynomials(degree) @@ -438,9 +431,15 @@ def define_theta(name, tabmat, additional_indices=(), width=None): if tabmat.slice_size is not None: inames[0] = tabmat.slice_size * inames[0] + tabmat.slice_index - args = [inames[1], prim.Subscript(prim.Variable(qp), (inames[0],))] - if tabmat.face is not None: - args[1] = tabmat.face + args = [inames[1]] + + if tabmat.face is None: + qp = name_oned_quadrature_points(bound) + qw = name_oned_quadrature_weights(bound) + sort_quadrature_points_weights(qp, qw, bound) + args.append(prim.Subscript(prim.Variable(qp), (inames[0],))) + else: + args.append(tabmat.face) instruction(assignee=prim.Subscript(prim.Variable(name), (i, j) + additional_indices), expression=prim.Call(PolynomialLookup(polynomials, tabmat.derivative), tuple(args)), diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index f3fb96b99b0d7cb0a633e6d499f6e362db5da207..4f7abe67048b3016db2277153844757116996164 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -18,9 +18,9 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, quadrature_points_per_direction, set_quadrature_points, ) -from dune.perftool.error import PerftoolError -from dune.perftool.options import get_option -from dune.perftool.tools import add_to_frozendict, round_to_multiple +from dune.perftool.error import PerftoolVectorizationError +from dune.perftool.options import get_form_option +from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff from pytools import product from frozendict import frozendict @@ -33,7 +33,7 @@ import math @generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda o, n: o) def _cache_vectorization_info(old, new): if new is None: - raise PerftoolError("Vectorization info for sum factorization kernel was not gathered correctly!") + raise PerftoolVectorizationError("Vectorization info for sum factorization kernel was not gathered correctly!") return new @@ -48,13 +48,6 @@ def attach_vectorization_info(sf): return _cache_vectorization_info(sf, None) -def position_penalty_factor(sf): - if isinstance(sf, SumfactKernel) or sf.vertical_width > 1: - return 1 - else: - return 1 + sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length)) - - @backend(interface="vectorization_strategy", name="model") def costmodel(sf): # Penalize vertical vectorization @@ -66,17 +59,17 @@ def costmodel(sf): scalar_penalty = get_vcl_type_size(dtype_floatingpoint()) # Return total operations - return sf.operations * position_penalty_factor(sf) * vertical_penalty * scalar_penalty + return sf.operations * vertical_penalty * scalar_penalty @backend(interface="vectorization_strategy", name="explicit") def explicit_costfunction(sf): # Read the explicitly set values for horizontal and vertical vectorization width = get_vcl_type_size(dtype_floatingpoint()) - horizontal = get_option("vectorization_horizontal") + horizontal = get_form_option("vectorization_horizontal") if horizontal is None: horizontal = width - vertical = get_option("vectorization_vertical") + vertical = get_form_option("vectorization_vertical") if vertical is None: vertical = 1 horizontal = int(horizontal) @@ -84,15 +77,17 @@ def explicit_costfunction(sf): if sf.horizontal_width == horizontal and sf.vertical_width == vertical: # Penalize position mapping - return position_penalty_factor(sf) + return sf.operations else: return 1000000000000 -def strategy_cost(strategy): +def strategy_cost(strat_tuple): + qp, strategy = strat_tuple func = get_backend(interface="vectorization_strategy", - selector=lambda: get_option("vectorization_strategy")) + selector=lambda: get_form_option("vectorization_strategy")) keys = set(sf.cache_key for sf in strategy.values()) + set_quadrature_points(qp) # Sum over all the sum factorization kernels in the realization score = 0.0 @@ -104,6 +99,13 @@ def strategy_cost(strategy): return score +def fixedqp_strategy_costfunction(qp): + def _cost(strategy): + return strategy_cost((qp, strategy)) + + return _cost + + def stringify_vectorization_strategy(strategy): result = [] qp, strategy = strategy @@ -139,7 +141,7 @@ def decide_vectorization_strategy(): from dune.perftool.generation import retrieve_cache_items all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] - # Stage 1 sumfactorizations that were actually used + # Stage 1 sum factorizations that were actually used basis_sumfacts = [i for i in retrieve_cache_items('kernel_default and basis_sf_kernels')] # This means we can have sum factorizations that will not get used @@ -149,7 +151,7 @@ def decide_vectorization_strategy(): active_sumfacts = [i for i in all_sumfacts if i.stage == 3 or i in basis_sumfacts] # If no vectorization is needed, abort now - if get_option("vectorization_strategy") == "none": + if get_form_option("vectorization_strategy") == "none": for sf in all_sumfacts: _cache_vectorization_info(sf, sf.copy(buffer=get_counted_variable("buffer"))) return @@ -157,29 +159,22 @@ def decide_vectorization_strategy(): logger.debug("decide_vectorization_strategy: Found {} active sum factorization nodes" .format(len(active_sumfacts))) - # Find the best vectorization strategy by using a costmodel - width = get_vcl_type_size(dtype_floatingpoint()) - # - # Optimize over all the possible quadrature point tuples + # Find the best vectorization strategy by using a costmodel # - quad_points = [quadrature_points_per_direction()] - - if get_option("vectorization_allow_quadrature_changes"): - sf = next(iter(active_sumfacts)) - depth = 1 - while depth <= width: - i = 0 if sf.matrix_sequence[0].face is None else 1 - quad = list(quadrature_points_per_direction()) - quad[i] = round_to_multiple(quad[i], depth) - quad_points.append(tuple(quad)) - depth = depth * 2 - quad_points = list(set(quad_points)) + # Note that this optimization procedure uses a hierarchic approach to bypass + # the problems of unfavorable complexity of the set of all possible vectorization + # opportunities. Optimizations are performed at different levels (you find these + # levels in the function names implementing them), where optimal solutions at a + # higher level are combined into lower level solutions or optima of optimal solutions + # at higher level are calculated: + # * Level 1: Finding an optimal quadrature tuple (by finding optimum of level 2 optima) + # * Level 2: Split by parallelizability and combine optima into optimal solution + # * Level 3: Optimize number of different inputs to consider + # * Level 4: Optimize horizontal/vertical/hybrid strategy + width = get_vcl_type_size(dtype_floatingpoint()) + qp, sfdict = level1_optimal_vectorization_strategy(active_sumfacts, width) - # Find the minimum cost strategy between all the quadrature point tuples - optimal_strategies = {qp: fixed_quadrature_optimal_vectorization(active_sumfacts, width, qp) for qp in quad_points} - qp = min(optimal_strategies, key=lambda qp: strategy_cost(optimal_strategies[qp])) - sfdict = optimal_strategies[qp] set_quadrature_points(qp) logger.debug("decide_vectorization_strategy: Decided for the following strategy:" @@ -193,85 +188,104 @@ def decide_vectorization_strategy(): _cache_vectorization_info(sf, sfdict[sf]) -def fixed_quadrature_optimal_vectorization(sumfacts, width, qp): - """ For a given quadrature point tuple, find the optimal strategy! +def level1_optimal_vectorization_strategy(sumfacts, width): + # Gather a list of possible quadrature point tuples + quad_points = [quadrature_points_per_direction()] + if get_form_option("vectorization_allow_quadrature_changes"): + sf = next(iter(sumfacts)) + depth = 1 + while depth <= width: + i = 0 if sf.matrix_sequence[0].face is None else 1 + quad = list(quadrature_points_per_direction()) + quad[i] = round_to_multiple(quad[i], depth) + quad_points.append(tuple(quad)) + depth = depth * 2 + quad_points = list(set(quad_points)) + + # Find the minimum cost strategy between all the quadrature point tuples + optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points} + qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp]))) + + return qp, optimal_strategies[qp] - In order to have this scale sufficiently, we cannot simply list all vectorization - opportunities and score them individually, but we need to do a divide and conquer - approach. - """ - set_quadrature_points(qp) - # Find the sets of simultaneously realizable kernels (thats an equivalence relation) - keys = frozenset(sf.input_key for sf in sumfacts) +def level2_optimal_vectorization_strategy(sumfacts, width, qp): + # Find the sets of simultaneously realizable kernels + keys = frozenset(sf.parallel_key for sf in sumfacts) # Find minimums for each of these sets sfdict = frozendict() + for key in keys: - key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) - minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), - key=strategy_cost) - sfdict = add_to_frozendict(sfdict, minimum) + key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) + key_strategy = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), + key=fixedqp_strategy_costfunction(qp)) + sfdict = add_to_frozendict(sfdict, key_strategy) return sfdict -def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()): +def level2_optimal_vectorization_strategy_generator(sumfacts, width, qp): + for opp in _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp): + # Add non-vectorized implementation information to all kernels that are not present in + # the optimal strategy + yield add_to_frozendict(opp, + {sf: sf.copy(buffer=get_counted_variable("buffer")) for sf in sumfacts if sf not in opp}) + + +def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, already=frozendict()): if len(sumfacts) == 0: - # We have gone into recursion deep enough to have all sum factorization nodes - # assigned their vectorized counterpart. We can yield the result now! yield already return - # Otherwise we pick a random sum factorization kernel and construct all the vectorization - # opportunities realizing this particular kernel and go into recursion. - sf_to_decide = next(iter(sumfacts)) - - # Have "unvectorized" as an option, although it is not good - for opp in fixed_quad_vectorization_opportunity_generator(sumfacts.difference({sf_to_decide}), - width, - qp, - add_to_frozendict(already, - {sf_to_decide: sf_to_decide.copy(buffer=get_counted_variable("buffer"))} - ), - ): - yield opp - - horizontal = 1 - while horizontal <= width: - # Iterate over the possible combinations of sum factorization kernels - # taking into account all the permutations of kernels. This also includes - # combinations which use a padding of 1 - but only for pure horizontality. - generators = [it.permutations(sumfacts, horizontal)] - if horizontal >= 4: - generators.append(it.permutations(sumfacts, horizontal - 1)) - for combo in it.chain(*generators): - # The chosen kernels must be part of the kernels for recursion - # to work correctly - if sf_to_decide not in combo: - continue + # We store the information whether a vectorization opportunity has been yielded from this + # generator to yield an incomplete strategy if not (which is then completed with unvectorized + # kernel implementations) + yielded = False + + # Find the number of input coefficients we can work on + keys = frozenset(sf.inout_key for sf in sumfacts) + + inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in sorted(keys)] + + for parallel in (1, 2): + if parallel > len(keys): + continue + + horizontal = 1 + while horizontal <= width // parallel: + combo = sum((inoutkey_sumfacts[part][:horizontal] for part in range(parallel)), ()) + + vecdict = get_vectorization_dict(combo, width // (horizontal * parallel), horizontal * parallel, qp) + horizontal *= 2 - # Set up the vectorization dict for this combo - vecdict = get_vectorization_dict(combo, width // horizontal, horizontal, qp) if vecdict is None: # This particular choice was rejected for some reason. # Possible reasons: # * the quadrature point tuple not being suitable # for this vectorization strategy + # * there are not enough horizontal kernels continue # Go into recursion to also vectorize all kernels not in this combo - for opp in fixed_quad_vectorization_opportunity_generator(sumfacts.difference(combo), - width, - qp, - add_to_frozendict(already, vecdict), - ): + for opp in _level2_optimal_vectorization_strategy_generator(list_diff(sumfacts, combo), + width, + qp, + add_to_frozendict(already, vecdict), + ): + yielded = True yield opp - horizontal = horizontal * 2 + # If we did not yield on this recursion level, yield what we got so far + if not yielded: + yield already def get_vectorization_dict(sumfacts, vertical, horizontal, qp): + # Discard opportunities that do not contain enough horizontal kernels + if len(sumfacts) not in (horizontal, horizontal - 1): + return None + # Enhance the list of sumfact nodes by adding vertical splittings kernels = [] for sf in sumfacts: diff --git a/python/dune/perftool/tools.py b/python/dune/perftool/tools.py index 8f41f3357374f4d9791edc898dd4a0274a0141fe..d5c0a18ebe8f7e32316aa459c959ab1eb9ba75f8 100644 --- a/python/dune/perftool/tools.py +++ b/python/dune/perftool/tools.py @@ -76,3 +76,31 @@ def add_to_frozendict(fd, valdict): t = dict(fd) t.update(valdict) return frozendict.frozendict(t) + + +def list_diff(l1, l2): + l = [] + for item in l1: + if item not in l2: + l.append(item) + return l + + +def get_leaf(element, index): + """ return a leaf element if the given element is a MixedElement """ + leaf_element = element + from ufl import MixedElement + if isinstance(element, MixedElement): + assert isinstance(index, int) + leaf_element = element.extract_component(index)[1] + + return leaf_element + + +def remove_duplicates(iterable): + """ Remove duplicates from an iterable while preserving the order """ + seen = set() + for i in iterable: + if i not in seen: + yield i + seen.add(i) diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py index ac372d7fde8ba533fc52232f663a292026c2b7b4..bf3a6df939ddf787967b16c316cd2aaab308ce07 100644 --- a/python/dune/perftool/ufl/modified_terminals.py +++ b/python/dune/perftool/ufl/modified_terminals.py @@ -9,8 +9,8 @@ import ufl.classes as uc class Restriction: NONE = 0 - NEGATIVE = 1 - POSITIVE = 2 + POSITIVE = 1 + NEGATIVE = 2 class ModifiedArgument(Record): diff --git a/python/dune/perftool/ufl/preprocess.py b/python/dune/perftool/ufl/preprocess.py index 4564fee060d339c93a1f362ccdc7523d258e6856..19ca10359de05dec5154dc194d18a9233645664b 100644 --- a/python/dune/perftool/ufl/preprocess.py +++ b/python/dune/perftool/ufl/preprocess.py @@ -1,8 +1,27 @@ """ Preprocessing algorithms for UFL forms """ import ufl.classes as uc +import ufl.algorithms.apply_function_pullbacks as afp +from pytools import memoize + +class FunctionPullbackApplier(afp.FunctionPullbackApplier): + def argument(self, o): + return afp.apply_single_function_pullbacks(o) + + def coefficient(self, o): + if o.count() in (0, 1): + return afp.apply_single_function_pullbacks(o) + else: + return o + + +# Monkey patch the pullback applier from UFL +afp.FunctionPullbackApplier = FunctionPullbackApplier + + +@memoize def preprocess_form(form): from ufl.algorithms import compute_form_data formdata = compute_form_data(form, diff --git a/python/dune/perftool/ufl/transformations/__init__.py b/python/dune/perftool/ufl/transformations/__init__.py index dde8a965177b657a0ed9554302fa3e95c8bf7825..de66173a2a04bf3d5f5b7873d8444d230e665ca0 100644 --- a/python/dune/perftool/ufl/transformations/__init__.py +++ b/python/dune/perftool/ufl/transformations/__init__.py @@ -19,10 +19,10 @@ class UFLTransformationWrapper(object): return # Write out a dot file - from dune.perftool.options import get_option - if get_option("print_transformations"): + from dune.perftool.options import get_form_option + if get_form_option("print_transformations"): import os - dir = get_option("print_transformations_dir") + dir = get_form_option("print_transformations_dir") for i, exprtowrite in enumerate(expr): filename = "trafo_{}_{}_{}{}.dot".format(self.name, str(self.counter).zfill(4), "in" if before else "out", "_{}".format(i) if len(expr) > 1 else "") diff --git a/python/dune/perftool/ufl/transformations/blockpreconditioner.py b/python/dune/perftool/ufl/transformations/blockpreconditioner.py new file mode 100644 index 0000000000000000000000000000000000000000..c16c11520ccef27a5cf23d22ba387846b434b4a0 --- /dev/null +++ b/python/dune/perftool/ufl/transformations/blockpreconditioner.py @@ -0,0 +1,82 @@ +""" Derive block preconditioners from residual forms """ + +from dune.perftool.ufl.modified_terminals import Restriction + +from ufl.algorithms import MultiFunction +from ufl.algorithms.map_integrands import map_integrands + +import ufl.classes as uc +import itertools + + +class OffDiagonalBlockSwitcher(MultiFunction): + def __init__(self, restrictions): + self.restrictions = restrictions + self.res = Restriction.NONE + MultiFunction.__init__(self) + + def expr(self, o): + return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands)) + + def positive_restricted(self, o): + self.res = Restriction.POSITIVE + ret = self(o.ufl_operands[0]) + self.rest = Restriction.NONE + if isinstance(ret, uc.Zero): + return ret + else: + return o + + def negative_restricted(self, o): + self.res = Restriction.NEGATIVE + ret = self(o.ufl_operands[0]) + self.res = Restriction.NONE + if isinstance(ret, uc.Zero): + return ret + else: + return o + + def reference_value(self, o): + ret = self(o.ufl_operands[0]) + if isinstance(ret, uc.Zero): + return ret + else: + return o + + def argument(self, o): + if self.res == self.restrictions[o.number()]: + return o + else: + return uc.Zero(shape=o.ufl_shape, + free_indices=o.ufl_free_indices, + index_dimensions=o.ufl_index_dimensions) + + +def list_restriction_tuples(diagonal): + if diagonal: + yield (Restriction.NONE, Restriction.NONE) + + res = (Restriction.POSITIVE, Restriction.NEGATIVE) + amount = 1 if diagonal else 2 + + for rtup in itertools.product(res, res): + if len(set(rtup)) == amount: + yield rtup + + +def _block_jacobian(form, diagonal=True): + assert(len(form.arguments()) == 2) + + forms = [] + for rtup in list_restriction_tuples(diagonal): + forms.append(map_integrands(OffDiagonalBlockSwitcher(rtup), form)) + + return sum(forms) + + +def diagonal_block_jacobian(form): + return _block_jacobian(form) + + +def offdiagonal_block_jacobian(form): + return _block_jacobian(form, False) diff --git a/python/dune/perftool/ufl/transformations/indexpushdown.py b/python/dune/perftool/ufl/transformations/indexpushdown.py index 1dd7139d880d947cb8da33a26630cc008ce514f8..73b8d73a670f66517060ac61338a683da275d90e 100644 --- a/python/dune/perftool/ufl/transformations/indexpushdown.py +++ b/python/dune/perftool/ufl/transformations/indexpushdown.py @@ -16,9 +16,9 @@ class IndexPushDown(MultiFunction): terms = [uc.Indexed(self(term), idx) for term in get_operands(expr)] return construct_binary_operator(terms, uc.Sum) elif isinstance(expr, uc.Conditional): - return uc.Conditional(expr.ufl_operands[0], - uc.Indexed(self(expr.ufl_operands[1]), idx), - uc.Indexed(self(expr.ufl_operands[2]), idx) + return uc.Conditional(self(expr.ufl_operands[0]), + self(uc.Indexed(expr.ufl_operands[1], idx)), + self(uc.Indexed(expr.ufl_operands[2], idx)) ) else: # This is a normal indexed, we treat it as any other. diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index fcf2ee7728e96a57ebd94ada15966c992ff8120f..cbede6c30fa7d6754b58b57b662ba126a1dec6b5 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -13,8 +13,7 @@ from dune.perftool.ufl.modified_terminals import (ModifiedTerminalTracker, Restriction, ) from dune.perftool.tools import maybe_wrap_subscript -from dune.perftool.options import get_option -from dune.perftool.pdelab.parameter import name_paramclass, name_time +from dune.perftool.options import get_form_option from loopy import Reduction from pymbolic.primitives import (Call, @@ -61,7 +60,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): self.current_info = info expr = self._call(o, False) if expr != 0: - if get_option("simplify"): + if get_form_option("simplify"): from dune.perftool.sympy import simplify_pymbolic_expression expr = simplify_pymbolic_expression(expr) self.interface.generate_accumulation_instruction(expr, self) @@ -105,7 +104,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # Correct the restriction on boundary integrals restriction = self.restriction if self.measure == 'exterior_facet': - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE leaf_element = o.ufl_element() # Select the correct leaf element in the case of this being a mixed finite element @@ -128,13 +127,13 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): return self.interface.pymbolic_basis(leaf_element, restriction, o.number()) def coefficient(self, o): + # Correct the restriction on boundary integrals + restriction = self.restriction + if self.measure == 'exterior_facet': + restriction = Restriction.POSITIVE + # Do something different for trial function and coefficients from jacobian apply if o.count() == 0 or o.count() == 1: - # Correct the restriction on boundary integrals - restriction = self.restriction - if self.measure == 'exterior_facet': - restriction = Restriction.NEGATIVE - self.interface.initialize_function_spaces(o, self) index = None @@ -161,31 +160,19 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # In this case it represents the time variable elif o.count() == 2: - param = name_paramclass() - time = name_time() - name = param + "." + time - valuearg(name) - return Variable(name) - - # Check if this is a parameter function + # The base class 'InstationaryLocalOperatorDefaultMethods' stores the time + # and exports it through a getter method 'getTime' + return prim.Call(prim.Variable("getTime"), ()) else: - raise NotImplementedError("Handling non-symbolic parameter functions is currently reevaluated!") - # We expect all coefficients to be of type Expression! - assert isinstance(o, Expression) - - # Determine the name of the parameter function - name = get_global_context_value("data").object_names[id(o)] - - cellwise_constant = is_cellwise_constant(o) + if self.reference_grad: + raise PerftoolUFLError("Coefficient gradients should not be transformed to reference element") - # Trigger the generation of code for this thing in the parameter class - if o.on_intersection: - self.interface.intersection_parameter_function(name, o, cellwise_constant) - else: - self.interface.cell_parameter_function(name, o, self.restriction, cellwise_constant) + return self.interface.pymbolic_gridfunction(o, restriction, self.grad) - # And return a symbol - return Variable(name) + def variable(self, o): + # Right now only scalar varibables are supported + assert o.ufl_shape is () + return o.expression().value() # # Handlers for all indexing related stuff @@ -248,6 +235,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): if all(isinstance(i, int) for i in self.indices): index = self.indices[0] self.indices = self.indices[1:] + if len(self.indices) == 0: + self.indices = None return self.call(o.ufl_operands[index]) else: return self.interface.pymbolic_list_tensor(o) @@ -354,6 +343,22 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): def min_value(self, o): return self._minmax_impl(min, "min", tuple(self.call(op) for op in o.ufl_operands)) + def math_function(self, o): + # MathFunction is a base class for unary functions. We use this to provide + # custom functions. Such a custom functions inherits from it and defines the + # following methods: + # * visit: This function is called from here to delegate the visiting process + # to the user code. The only argument is this visitor instance. + # * derivative: It is called from UFL AD code to determine the derivative. + # Upstream documentation indicates that FEniCS allows the same + # (ab)use of the MathFunction node. + # Note that if the __init__ method of your function differs from MathFunction, + # you also need to implement the method _ufl_expr_reconstruct_ + if hasattr(o, "visit"): + return o.visit(self) + else: + raise NotImplementedError("Function {} is not known to dune-perftool.".format(o._name)) + # # Handler for conditionals, use pymbolic base implementation # @@ -365,7 +370,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): try: evaluated = eval(str(cond)) except: - return prim.If(self.call(o.ufl_operands[0]), + return prim.If(cond, self.call(o.ufl_operands[1]), self.call(o.ufl_operands[2])) @@ -425,6 +430,11 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # The normal must be restricted to be well-defined assert self.restriction is not Restriction.NONE + # Note: In UFL the jump is defined as: jump(v) = v('+') - + # v('-'). The corresponding outer unit normal is + # n=FacetNormal(cell)('+'). In order to be consisten with UFL + # we need to create the outer unit normal if the restriction + # is positive. if self.restriction == Restriction.POSITIVE: return self.interface.pymbolic_unit_outer_normal() if self.restriction == Restriction.NEGATIVE: @@ -443,14 +453,14 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): def jacobian_inverse(self, o): restriction = self.restriction if self.measure == 'exterior_facet': - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE assert(len(self.indices) == 2) i, j = self.indices self.indices = None # Implement diagonal jacobians for unrolled matrices! - if get_option("diagonal_transformation_matrix"): + if get_form_option("diagonal_transformation_matrix"): if isinstance(i, int) and isinstance(j, int) and i != j: return 0 @@ -465,7 +475,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): def cell_volume(self, o): restriction = self.restriction if self.measure == 'exterior_facet': - restriction = Restriction.NEGATIVE + restriction = Restriction.POSITIVE return self.interface.pymbolic_cell_volume(restriction) diff --git a/python/loopy b/python/loopy index e4a05746af70ed6e6b7e5b91984f7303fe96f1f4..dedb956bd72a204a685e7aeb7788d1fa55969899 160000 --- a/python/loopy +++ b/python/loopy @@ -1 +1 @@ -Subproject commit e4a05746af70ed6e6b7e5b91984f7303fe96f1f4 +Subproject commit dedb956bd72a204a685e7aeb7788d1fa55969899 diff --git a/python/pymbolic b/python/pymbolic index 915ecb96c1eb60b82973e8cf695e4ffcb622c90a..ffecfaebf21dc8799cd5d007a969e659b255a1e3 160000 --- a/python/pymbolic +++ b/python/pymbolic @@ -1 +1 @@ -Subproject commit 915ecb96c1eb60b82973e8cf695e4ffcb622c90a +Subproject commit ffecfaebf21dc8799cd5d007a969e659b255a1e3 diff --git a/python/pytools b/python/pytools index e4dd13899c9161ce641c29c55973bfce3df52972..747a1c1fac3fb4f2067f00c1a670f5a7b963b396 160000 --- a/python/pytools +++ b/python/pytools @@ -1 +1 @@ -Subproject commit e4dd13899c9161ce641c29c55973bfce3df52972 +Subproject commit 747a1c1fac3fb4f2067f00c1a670f5a7b963b396 diff --git a/python/setup.py b/python/setup.py index ccbe9e56658fa87a0c574767f6c7f4282612a750..f193748a5e814895f23fee033c74c850aaf39573 100644 --- a/python/setup.py +++ b/python/setup.py @@ -44,6 +44,7 @@ setup(name='dune.perftool', cmdclass={'test': PyTest}, entry_points = { "console_scripts": [ - "ufl2pdelab = dune.perftool.compile:compile_form", + "generate_operators = dune.perftool.compile:entry_generate_operators", + "generate_driver = dune.perftool.compile:entry_generate_driver", ] }) diff --git a/python/ufl b/python/ufl index 962d56f65821fb9c50ca4a5a858882c472243431..5a9593c956fc843eee6ce3a2ae2b9cbc4aec62bf 160000 --- a/python/ufl +++ b/python/ufl @@ -1 +1 @@ -Subproject commit 962d56f65821fb9c50ca4a5a858882c472243431 +Subproject commit 5a9593c956fc843eee6ce3a2ae2b9cbc4aec62bf diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d0e1df820ce14b12a2bada056ae67e8bce81c318..4be79371bc51e40c4334f0e91343c522d00d2147 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,10 +1,11 @@ add_subdirectory(hyperbolic) add_subdirectory(heatequation) -add_subdirectory(laplace) +add_subdirectory(navier-stokes) add_subdirectory(nonlinear) add_subdirectory(poisson) add_subdirectory(stokes) add_subdirectory(sumfact) - -add_subdirectory(blockstructured) \ No newline at end of file +add_subdirectory(coeffeval) +add_subdirectory(blockstructured) +add_subdirectory(adjoint) diff --git a/test/adjoint/CMakeLists.txt b/test/adjoint/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..056264ebf8792544fc98b2fd4106a1493dfac54a --- /dev/null +++ b/test/adjoint/CMakeLists.txt @@ -0,0 +1,5 @@ +dune_add_formcompiler_system_test(UFLFILE poisson_mc.ufl + BASENAME adjoint_poisson_mc + INIFILE poisson_mc.ini + SOURCE poisson_mc_main.cc + ) diff --git a/test/adjoint/poisson_mc.ini b/test/adjoint/poisson_mc.ini new file mode 100644 index 0000000000000000000000000000000000000000..fe36a516f8cdd2a57a16e756a5cbfd770e3a5e79 --- /dev/null +++ b/test/adjoint/poisson_mc.ini @@ -0,0 +1,34 @@ +__name = adjoint_poisson_mc + +lowerleft = 0.0 0.0 +upperright = 1.0 1.0 +elements = 32 32 +elementType = simplical + +[wrapper.vtkcompare] +name = poisson_mc +extension = vtu + +[formcompiler] +operators = r, r_adjoint, r_control + +[formcompiler.r] +form = r +filename = poisson_mc_operator_r.hh +classname = ROperator + +[formcompiler.r_adjoint] +form = r +adjoint = 1 +objective_function = J +filename = poisson_mc_operator_r_adjoint.hh +classname = RAdjointOperator + +[formcompiler.r_control] +form = r +control = 1 +objective_function = J +control_variable = A, b, f +generate_jacobians = 0 +filename = poisson_mc_operator_r_control.hh +classname = RControlOperator diff --git a/test/adjoint/poisson_mc.ufl b/test/adjoint/poisson_mc.ufl new file mode 100644 index 0000000000000000000000000000000000000000..80fb03d0ef548788fd7417d8cd9402f5839ef98a --- /dev/null +++ b/test/adjoint/poisson_mc.ufl @@ -0,0 +1,20 @@ +cell = triangle + +x = SpatialCoordinate(cell) + +V = FiniteElement("CG", cell, 1) +u = TrialFunction(V) +v = TestFunction(V) + +g = x[0]*x[0] + x[1]*x[1] +A = as_matrix([[variable(1.0), variable(0.5)],[variable(1.2), variable(3.0)]]) +b = as_vector([variable(2.0), variable(4.2)]) +c = as_vector([-2.8, 1.7]) +f = variable(-4.0) + +r = (inner(A*grad(u), grad(v)) + inner(c,b)*u*v - f*v)*dx +forms = [r] + +J = inner(u,u)*dx +interpolate_expression = g +is_dirichlet = 1 \ No newline at end of file diff --git a/test/adjoint/poisson_mc_driver.hh b/test/adjoint/poisson_mc_driver.hh new file mode 100644 index 0000000000000000000000000000000000000000..d75f8b056ae11c39f299d243f04de10f866b00ea --- /dev/null +++ b/test/adjoint/poisson_mc_driver.hh @@ -0,0 +1,229 @@ +#ifndef POISSON_MC_DRIVER_HH +#define POISSON_MC_DRIVER_HH + + +#include "dune/pdelab/gridfunctionspace/vtk.hh" +#include "dune/pdelab/backend/istl.hh" +#include "dune/common/parametertreeparser.hh" +#include "dune/pdelab/stationary/linearproblem.hh" +#include "dune/testtools/gridconstruction.hh" +#include <random> +#include "dune/pdelab/function/callableadapter.hh" +#include "dune/alugrid/grid.hh" +#include "string" +#include "dune/perftool/vtkpredicate.hh" +#include "dune/pdelab/gridfunctionspace/gridfunctionadapter.hh" +#include "dune/common/parametertree.hh" +#include "dune/pdelab/gridoperator/gridoperator.hh" +#include "dune/grid/io/file/vtk/subsamplingvtkwriter.hh" +#include "dune/pdelab/common/functionutilities.hh" +#include "dune/pdelab/finiteelementmap/pkfem.hh" +#include "dune/pdelab/constraints/conforming.hh" +#include "dune/pdelab/function/discretegridviewfunction.hh" + +#include "poisson_mc_operator_r.hh" +#include "poisson_mc_operator_r_adjoint.hh" +#include "poisson_mc_operator_r_control.hh" + + +bool driver(int argc, char** argv){ + // Initialize basic stuff... + using RangeType = double; + Dune::ParameterTree initree; + Dune::ParameterTreeParser::readINITree(argv[1], initree); + + // Setup grid (view)... + using Grid = Dune::ALUGrid<2, 2, Dune::simplex, Dune::conforming>; + using GV = Grid::LeafGridView; + using DF = Grid::ctype; + IniGridFactory<Grid> factory(initree); + std::shared_ptr<Grid> grid = factory.getGrid(); + GV gv = grid->leafGridView(); + + // Set up finite element maps... + using P1_FEM = Dune::PDELab::PkLocalFiniteElementMap<GV, DF, RangeType, 1>; + P1_FEM p1_fem(gv); + + // Set up grid function spaces... + using VectorBackendP1 = Dune::PDELab::ISTL::VectorBackend<Dune::PDELab::ISTL::Blocking::none>; + using DirichletConstraintsAssember = Dune::PDELab::ConformingDirichletConstraints; + using P1_dirichlet_GFS = Dune::PDELab::GridFunctionSpace<GV, P1_FEM, DirichletConstraintsAssember, VectorBackendP1>; + P1_dirichlet_GFS p1_dirichlet_gfs_(gv, p1_fem); + p1_dirichlet_gfs_.name("p1_dirichlet_gfs_"); + + // Set up constraints container... + using P1_dirichlet_GFS_CC = P1_dirichlet_GFS::ConstraintsContainer<RangeType>::Type; + P1_dirichlet_GFS_CC p1_dirichlet_gfs__cc; + p1_dirichlet_gfs__cc.clear(); + auto p1_bctype_lambda = [&](const auto& x){ return 1.0; }; + auto p1_bctype = Dune::PDELab::makeBoundaryConditionFromCallable(gv, p1_bctype_lambda); + Dune::PDELab::constraints(p1_bctype, p1_dirichlet_gfs_, p1_dirichlet_gfs__cc); + + // Set up grid grid operators... + using LOP_R = ROperator<P1_dirichlet_GFS, P1_dirichlet_GFS, RangeType>; + using MatrixBackend = Dune::PDELab::ISTL::BCRSMatrixBackend<>; + using GO_r = Dune::PDELab::GridOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, LOP_R, MatrixBackend, DF, RangeType, RangeType, P1_dirichlet_GFS_CC, P1_dirichlet_GFS_CC>; + LOP_R lop_r(p1_dirichlet_gfs_, p1_dirichlet_gfs_, initree); + p1_dirichlet_gfs_.update(); + int generic_dof_estimate = 6 * p1_dirichlet_gfs_.maxLocalSize(); + int dofestimate = initree.get<int>("istl.number_of_nnz", generic_dof_estimate); + MatrixBackend mb(dofestimate); + GO_r go_r(p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, lop_r, mb); + std::cout << "gfs with " << p1_dirichlet_gfs_.size() << " dofs generated "<< std::endl; + std::cout << "cc with " << p1_dirichlet_gfs__cc.size() << " dofs generated "<< std::endl; + + // Set up solution vectors... + using V_R = Dune::PDELab::Backend::Vector<P1_dirichlet_GFS,DF>; + V_R x_r(p1_dirichlet_gfs_); + x_r = 0.0; + auto lambda_0000 = [&](const auto& x){ return (double)x[1] * x[1] + x[0] * x[0]; }; + auto func_0000 = Dune::PDELab::makeGridFunctionFromCallable(gv, lambda_0000); + Dune::PDELab::interpolate(func_0000, p1_dirichlet_gfs_, x_r); + auto lambda_0001 = [&](const auto& x){ return 0.0; }; + auto func_0001 = Dune::PDELab::makeGridFunctionFromCallable(gv, lambda_0001); + + // Set up (non)linear solvers... + using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_SuperLU; + using SLP = Dune::PDELab::StationaryLinearProblemSolver<GO_r, LinearSolver, V_R>; + LinearSolver ls(false); + double reduction = initree.get<double>("reduction", 1e-12); + SLP slp(go_r, ls, x_r, reduction); + slp.apply(); + + // Do visualization... + using VTKWriter = Dune::SubsamplingVTKWriter<GV>; + Dune::RefinementIntervals subint(initree.get<int>("vtk.subsamplinglevel", 1)); + VTKWriter vtkwriter(gv, subint); + std::string vtkfile = initree.get<std::string>("wrapper.vtkcompare.name", "output"); + CuttingPredicate predicate; + Dune::PDELab::addSolutionToVTKWriter(vtkwriter, p1_dirichlet_gfs_, x_r, Dune::PDELab::vtk::defaultNameScheme(), predicate); + vtkwriter.write(vtkfile, Dune::VTK::ascii); + + + //===============================================================// + // ___ _ _ _ _ _____ _ __ __ // + // / _ \ | (_) (_) | | / ___| | / _|/ _| // + // / /_\ \ __| |_ ___ _ _ __ | |_ \ `--.| |_ _ _| |_| |_ // + // | _ |/ _` | |/ _ \| | '_ \| __| `--. \ __| | | | _| _| // + // | | | | (_| | | (_) | | | | | |_ /\__/ / |_| |_| | | | | // + // \_| |_/\__,_| |\___/|_|_| |_|\__| \____/ \__|\__,_|_| |_| // + // _/ | // + // |__/ // + //===============================================================// + + std::cout << std::endl << "Adjoint Stuff" << std::endl << std::endl; + + //=========// + // Adjoint // + //=========// + + // The adjoint needs the solution of the forward problem as DiscreteGridViewFunction + using GF_X = Dune::PDELab::DiscreteGridViewFunction<P1_dirichlet_GFS, V_R>; + GF_X x_gf(p1_dirichlet_gfs_, x_r); + + // Local operator for adjoint problem + using LOP_Adjoint = RAdjointOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, RangeType, GF_X>; + LOP_Adjoint lop_adjoint(p1_dirichlet_gfs_, p1_dirichlet_gfs_, initree, x_gf); + + // Grid operator for adjoint problem + using GO_Adjoint = Dune::PDELab::GridOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, LOP_Adjoint, MatrixBackend, DF, RangeType, RangeType, P1_dirichlet_GFS_CC, P1_dirichlet_GFS_CC>; + GO_Adjoint go_adjoint(p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, lop_adjoint, mb); + + // Boundary condition + using V_Adjoint = GO_Adjoint::Traits::Domain; + V_Adjoint x_adjoint(p1_dirichlet_gfs_); + x_adjoint = 0.0; + + // Solve problem + using SLP_Adjoint = Dune::PDELab::StationaryLinearProblemSolver<GO_Adjoint, LinearSolver, V_Adjoint>; + SLP_Adjoint slp_adjoint(go_adjoint, ls, x_adjoint, reduction); + slp_adjoint.apply(); + + // print_l2_norm(p1_dirichlet_gfs_, x_adjoint, gv); + using Dune::PDELab::Backend::native; + std::cout << "Norm of adjoint vector: " << native(x_adjoint).two_norm() << std::endl; + + //=========// + // Control // + //=========// + + // The control problem needs the solution of the adjoint problem as DiscreteGridViewFunction + using GF_Adjoint = Dune::PDELab::DiscreteGridViewFunction<P1_dirichlet_GFS, V_Adjoint>; + GF_Adjoint gf_adjoint(p1_dirichlet_gfs_, x_adjoint); + + // Derivative of objective function w.r.t. the control + using DJDM = std::vector<RangeType>; + DJDM dJdm(7,0.0); + + // Local operator for control problem + using LOP_Control = RControlOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, RangeType, GF_Adjoint, DJDM>; + LOP_Control lop_control(p1_dirichlet_gfs_, p1_dirichlet_gfs_, initree, gf_adjoint, dJdm); + + // Grid operator for control problem + // + // Note: Create without contstraints container. We don't want to + // apply any Dirichlet constraints here (this would mean setting the + // corresponding values of the residual vector to zero). + // + // Note: Having a GFS that was constructed with dirichlet + // constraints and then creating a GO without constraints works. + using GO_Control = Dune::PDELab::GridOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, LOP_Control, MatrixBackend, DF, RangeType, RangeType>; + GO_Control go_control(p1_dirichlet_gfs_, p1_dirichlet_gfs_, lop_control, mb); + + // Calculate dJdm + using V_Control = GO_Control::Traits::Domain; + V_Control r_control(p1_dirichlet_gfs_); + r_control = 0.0; + go_control.residual(x_r, r_control); + + //========================================// + // Print derivative of objective function // + //========================================// + + std::cout << std::endl; + std::cout << "Derivatives of objective function: " << std::setprecision(20) + << dJdm[0] << " " + << dJdm[1] << " " + << dJdm[2] << " " + << dJdm[3] << " " + << dJdm[4] << " " + << dJdm[5] << " " + << dJdm[6] << " " + << std::endl; + std::cout << std::endl; + + //==================================================================================// + // _____ _ ___ _ _ _ _ _____ _ __ __ // + // | ___| | | / _ \ | (_) (_) | | / ___| | / _|/ _| // + // | |__ _ __ __| | / /_\ \ __| |_ ___ _ _ __ | |_ \ `--.| |_ _ _| |_| |_ // + // | __| '_ \ / _` | | _ |/ _` | |/ _ \| | '_ \| __| `--. \ __| | | | _| _| // + // | |__| | | | (_| | | | | | (_| | | (_) | | | | | |_ /\__/ / |_| |_| | | | | // + // \____/_| |_|\__,_| \_| |_/\__,_| |\___/|_|_| |_|\__| \____/ \__|\__,_|_| |_| // + // _/ | // + // |__/ // + //==================================================================================// + + // Compare with results from doflin-adjoint: + using std::abs; + bool fail = false; + if (abs(dJdm[0]- 0.02895684)>1e-3) + fail = true; + if (abs(dJdm[1]- 0.00173435)>1e-3) + fail = true; + if (abs(dJdm[2]- 0.00173435)>1e-3) + fail = true; + if (abs(dJdm[3]- 0.03019001)>1e-3) + fail = true; + if (abs(dJdm[4]- 0.05060596)>1e-3) + fail = true; + if (abs(dJdm[5]- -0.03072505)>1e-3) + fail = true; + if (abs(dJdm[6]- 0.0236605)>1e-3) + fail = true; + + return fail; + +} + + +#endif //GENERATED_POISSON_MC_DRIVER_HH diff --git a/cmake/modules/StandardMain.cmake b/test/adjoint/poisson_mc_main.cc similarity index 96% rename from cmake/modules/StandardMain.cmake rename to test/adjoint/poisson_mc_main.cc index 028c2efc0b208705845326fc9a621697c72ec408..00ff73821febe039cb4508b4d578426c83332ce9 100644 --- a/cmake/modules/StandardMain.cmake +++ b/test/adjoint/poisson_mc_main.cc @@ -5,7 +5,7 @@ #include <dune/common/parallel/mpihelper.hh> #include <dune/common/exceptions.hh> -#include"@GEN_DRIVER@" +#include"poisson_mc_driver.hh" int main(int argc, char** argv) { diff --git a/test/blockstructured/nonlinear/nonlinear.mini b/test/blockstructured/nonlinear/nonlinear.mini index 18b3e9adc9328b7073d87c851582847eea4d2fef..5e9835a0311a98e815e7c54e51a02714b3bef9ee 100644 --- a/test/blockstructured/nonlinear/nonlinear.mini +++ b/test/blockstructured/nonlinear/nonlinear.mini @@ -10,5 +10,7 @@ extension = vtu [formcompiler] compare_l2errorsquared = 6e-4 + +[formcompiler.r] blockstructured = 1 -number_of_blocks = 5 \ No newline at end of file +number_of_blocks = 5 diff --git a/test/blockstructured/nonlinear/nonlinear.ufl b/test/blockstructured/nonlinear/nonlinear.ufl index 6187bbeb86d8390f578d337727089c254ff4ff06..d43cc0205dfeab043b3ca843b83fab4af435699f 100644 --- a/test/blockstructured/nonlinear/nonlinear.ufl +++ b/test/blockstructured/nonlinear/nonlinear.ufl @@ -9,7 +9,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) + u*u*v - f*v)*dx] -dirichlet_expression = g +r = (inner(grad(u), grad(v)) + u*u*v - f*v)*dx +interpolate_expression = g exact_solution = g is_dirichlet = 1 diff --git a/test/blockstructured/poisson/3d/poisson.mini b/test/blockstructured/poisson/3d/poisson.mini index 441fbe43ec06fa9095af4f28e310d53c2777bdea..e9e34187a447198666d8de5a11e77e54aaa4ebb1 100644 --- a/test/blockstructured/poisson/3d/poisson.mini +++ b/test/blockstructured/poisson/3d/poisson.mini @@ -10,9 +10,11 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num exact_solution_expression = g compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num blockstructured = 1 number_of_blocks = 3 diff --git a/test/blockstructured/poisson/3d/poisson.ufl b/test/blockstructured/poisson/3d/poisson.ufl index 562606820cf377a80cad2a0826f5207d4f5a2a5d..d0ebc7125e26ead1170cee0891dd628706a7ab11 100644 --- a/test/blockstructured/poisson/3d/poisson.ufl +++ b/test/blockstructured/poisson/3d/poisson.ufl @@ -10,7 +10,7 @@ u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] -dirichlet_expression = g +r = (inner(grad(u), grad(v)) - f*v)*dx +interpolate_expression = g exact_solution = g is_dirichlet = 1 diff --git a/test/blockstructured/poisson/poisson.mini b/test/blockstructured/poisson/poisson.mini index 98d4bd2636150eea063bc0259349f069d88473f9..3e7cdca908f73171b35e7b8cd120ee57e08ca485 100644 --- a/test/blockstructured/poisson/poisson.mini +++ b/test/blockstructured/poisson/poisson.mini @@ -10,7 +10,9 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num blockstructured = 1 number_of_blocks = 3 diff --git a/test/blockstructured/poisson/poisson.ufl b/test/blockstructured/poisson/poisson.ufl index d8a7ef0ce23d9cc7cf96445509530af750199743..f4f7145ca85de1f39952ee090a51b9b207aa29be 100644 --- a/test/blockstructured/poisson/poisson.ufl +++ b/test/blockstructured/poisson/poisson.ufl @@ -9,7 +9,7 @@ V = FiniteElement("CG", cell, 2) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] -dirichlet_expression = g +r = (inner(grad(u), grad(v)) - f*v)*dx +interpolate_expression = g exact_solution = g is_dirichlet = 1 diff --git a/test/blockstructured/poisson/poisson_matrix_free.mini b/test/blockstructured/poisson/poisson_matrix_free.mini index a5f2fecc732804cec7b9e64f2186033b8aeeba1e..e6b9af12a01c46fe8d6361050a341b7769a3e484 100644 --- a/test/blockstructured/poisson/poisson_matrix_free.mini +++ b/test/blockstructured/poisson/poisson_matrix_free.mini @@ -9,7 +9,9 @@ reference = poisson_ref extension = vtu [formcompiler] -matrix_free = 1 compare_l2errorsquared = 1e-7 + +[formcompiler.r] +matrix_free = 1 blockstructured = 1 number_of_blocks = 4 \ No newline at end of file diff --git a/test/blockstructured/poisson/poisson_neumann.mini b/test/blockstructured/poisson/poisson_neumann.mini index 93272e18a46bb9b6e4c3e1ee19894846fd330784..1512f88bc1ba25141e121de948da3aed06933b97 100644 --- a/test/blockstructured/poisson/poisson_neumann.mini +++ b/test/blockstructured/poisson/poisson_neumann.mini @@ -10,7 +10,9 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-8 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num blockstructured = 1 number_of_blocks = 4 diff --git a/test/blockstructured/poisson/poisson_neumann.ufl b/test/blockstructured/poisson/poisson_neumann.ufl index 867403d8920c6869eee6721a943c3cd6199c57b4..9fd7f4c625284a9fe3276f80881dbdc873b31fad 100644 --- a/test/blockstructured/poisson/poisson_neumann.ufl +++ b/test/blockstructured/poisson/poisson_neumann.ufl @@ -16,7 +16,7 @@ v = TestFunction(V) # Define the boundary measure that knows where we are... ds = ds(subdomain_data=bctype) -forms = [(inner(grad(u), grad(v)) - f*v)*dx - j*v*ds(0)] -dirichlet_expression = g +r = (inner(grad(u), grad(v)) - f*v)*dx - j*v*ds(0) +interpolate_expression = g exact_solution = g is_dirichlet = bctype diff --git a/test/blockstructured/poisson/poisson_tensor.mini b/test/blockstructured/poisson/poisson_tensor.mini index 44ad633559f6d7809db7b42298a3e5613161017a..ff6426b9129d3d0028047f7d1413a4f17761d6df 100644 --- a/test/blockstructured/poisson/poisson_tensor.mini +++ b/test/blockstructured/poisson/poisson_tensor.mini @@ -10,7 +10,9 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num blockstructured = 1 number_of_blocks = 4 diff --git a/test/blockstructured/poisson/poisson_tensor.ufl b/test/blockstructured/poisson/poisson_tensor.ufl index df8bcbab312335e8e0c8e1160a145288187318fc..239e14fca9550031cd53994d9528cb0c41e424a6 100644 --- a/test/blockstructured/poisson/poisson_tensor.ufl +++ b/test/blockstructured/poisson/poisson_tensor.ufl @@ -12,7 +12,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx] -dirichlet_expression = g +r = (inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx +interpolate_expression = g exact_solution = g is_dirichlet = 1 diff --git a/test/blockstructured/stokes/stokes.mini b/test/blockstructured/stokes/stokes.mini index ae59e3ef3ca1c12f74065e9c6c2ec361b495feb2..532a4159b6b6019525acdea50da3c18f14e22f9a 100644 --- a/test/blockstructured/stokes/stokes.mini +++ b/test/blockstructured/stokes/stokes.mini @@ -1,7 +1,7 @@ __name = blockstructured_stokes_{__exec_suffix} __exec_suffix = symdiff, numdiff | expand num -cells = 5 5 +cells = 20 20 extension = 1. 1. [wrapper.vtkcompare] @@ -10,7 +10,9 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-9 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num blockstructured = 1 -number_of_blocks = 3 \ No newline at end of file +number_of_blocks = 3 diff --git a/test/blockstructured/stokes/stokes.ufl b/test/blockstructured/stokes/stokes.ufl index c8f630b84aae43a68221ace670fe9fcb027af47d..4411e791138cf85824f24fc6549d52cf6ef1af0d 100644 --- a/test/blockstructured/stokes/stokes.ufl +++ b/test/blockstructured/stokes/stokes.ufl @@ -13,7 +13,6 @@ u, p = TrialFunctions(TH) r = (inner(grad(v), grad(u)) - div(v)*p - q*div(u))*dx -forms = [r] is_dirichlet = v_bctype, v_bctype, 0 -dirichlet_expression = g_v, None +interpolate_expression = g_v, None exact_solution = g_v, 8.*(1.-x[0]) diff --git a/test/coeffeval/CMakeLists.txt b/test/coeffeval/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ca549c996a2411ddd2746474fef1c7732f6566d --- /dev/null +++ b/test/coeffeval/CMakeLists.txt @@ -0,0 +1,5 @@ +dune_add_formcompiler_system_test(UFLFILE poisson.ufl + SOURCE coeffeval_poisson.cc + INIFILE coeffeval_poisson.mini + BASENAME coeffeval_poisson + ) diff --git a/test/coeffeval/coeffeval_poisson.cc b/test/coeffeval/coeffeval_poisson.cc new file mode 100644 index 0000000000000000000000000000000000000000..d4cd8e0d89850879394681162710f54e458a2bb3 --- /dev/null +++ b/test/coeffeval/coeffeval_poisson.cc @@ -0,0 +1,153 @@ +#include"config.h" + +#include "dune/common/parametertreeparser.hh" +#include "dune/pdelab/gridfunctionspace/gridfunctionadapter.hh" +#include "dune/pdelab/constraints/conforming.hh" +#include "dune/pdelab/backend/istl.hh" +#include "dune/pdelab/gridfunctionspace/vtk.hh" +#include "dune/common/parametertree.hh" +#include "dune/testtools/gridconstruction.hh" +#include "dune/pdelab/finiteelementmap/pkfem.hh" +#include <random> +#include "dune/pdelab/function/callableadapter.hh" +#include "dune/perftool/vtkpredicate.hh" +#include <string> +#include "dune/alugrid/grid.hh" +#include "dune/pdelab/common/functionutilities.hh" +#include "dune/pdelab/gridoperator/gridoperator.hh" +#include "dune/pdelab/stationary/linearproblem.hh" +#include "dune/grid/io/file/vtk/subsamplingvtkwriter.hh" +#include "dune/pdelab/function/discretegridviewfunction.hh" + +#if OPERATOR == 1 +#include "poisson_grad_localoperator.hh" +#endif + +#if OPERATOR == 0 +#include "poisson_nongrad_localoperator.hh" +#endif + +int main(int argc, char** argv) +{ + // MPI helper stuff + Dune::MPIHelper& helper = Dune::MPIHelper::instance(argc, argv); + + // Parse the ini file + Dune::ParameterTree initree; + Dune::ParameterTreeParser::readINITree(argv[1], initree); + + // Build a grid + using Grid = Dune::ALUGrid<2, 2, Dune::simplex, Dune::conforming>; + using GV = Grid::LeafGridView; + IniGridFactory<Grid> factory(initree); + std::shared_ptr<Grid> grid = factory.getGrid(); + GV gv = grid->leafGridView(); + + // General types and stuff + using DF = Grid::ctype; + using RangeType = double; + + // Finite Element Maps + using P1_FEM = Dune::PDELab::PkLocalFiniteElementMap<GV, DF, RangeType, 1>; + using P2_FEM = Dune::PDELab::PkLocalFiniteElementMap<GV, DF, RangeType, 2>; + P1_FEM p1_fem(gv); + P2_FEM p2_fem(gv); + + // Grid Function Spaces + using VectorBackend = Dune::PDELab::ISTL::VectorBackend<Dune::PDELab::ISTL::Blocking::none>; + using DirichletConstraintsAssember = Dune::PDELab::ConformingDirichletConstraints; + using P1_dirichlet_GFS = Dune::PDELab::GridFunctionSpace<GV, P1_FEM, DirichletConstraintsAssember, VectorBackend>; + using P2_GFS = Dune::PDELab::GridFunctionSpace<GV, P2_FEM, DirichletConstraintsAssember, VectorBackend>; + P1_dirichlet_GFS p1_dirichlet_gfs_(gv, p1_fem); + P2_GFS p2_gfs(gv, p2_fem); + p1_dirichlet_gfs_.name("p1_dirichlet_gfs_"); + p1_dirichlet_gfs_.update(); + std::cout << "gfs with " << p1_dirichlet_gfs_.size() << " dofs generated "<< std::endl; + + // Solution vectors / Grid Functions + using V_R = Dune::PDELab::Backend::Vector<P1_dirichlet_GFS,DF>; + using V2 = Dune::PDELab::Backend::Vector<P2_GFS,DF>; + V_R x_r(p1_dirichlet_gfs_); + V2 c(p2_gfs); + + using GF = Dune::PDELab::DiscreteGridViewFunction<P2_GFS, V2>; + GF c_gf(p2_gfs, c); + + // Local Operator + using LOP_R = PoissonLocalOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, RangeType, GF>; + LOP_R lop_r(p1_dirichlet_gfs_, p1_dirichlet_gfs_, initree, c_gf); + + // Constraints stuff + using P1_dirichlet_GFS_CC = P1_dirichlet_GFS::ConstraintsContainer<RangeType>::Type; + P1_dirichlet_GFS_CC p1_dirichlet_gfs__cc; + p1_dirichlet_gfs__cc.clear(); + auto p1_bctype_lambda = [&](const auto& x){ return 1.0; }; + auto p1_bctype = Dune::PDELab::makeBoundaryConditionFromCallable(gv, p1_bctype_lambda); + Dune::PDELab::constraints(p1_bctype, p1_dirichlet_gfs_, p1_dirichlet_gfs__cc); + std::cout << "cc with " << p1_dirichlet_gfs__cc.size() << " dofs generated "<< std::endl; + + // Matrix Backend + using MatrixBackend = Dune::PDELab::ISTL::BCRSMatrixBackend<>; + int generic_dof_estimate = 6 * p1_dirichlet_gfs_.maxLocalSize(); + int dofestimate = initree.get<int>("istl.number_of_nnz", generic_dof_estimate); + MatrixBackend mb(dofestimate); + + // Grid Operator + using GO_r = Dune::PDELab::GridOperator<P1_dirichlet_GFS, P1_dirichlet_GFS, LOP_R, MatrixBackend, DF, RangeType, RangeType, P1_dirichlet_GFS_CC, P1_dirichlet_GFS_CC>; + GO_r go_r(p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, p1_dirichlet_gfs_, p1_dirichlet_gfs__cc, lop_r, mb); + + // Solver + using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_SuperLU; + LinearSolver ls(false); + using SLP = Dune::PDELab::StationaryLinearProblemSolver<GO_r, LinearSolver, V_R>; + + // Interpolation + auto lambda_0000 = [&](const auto& x){ return (double)exp((-1.0) * ((0.5 - x[1]) * (0.5 - x[1]) + (0.5 - x[0]) * (0.5 - x[0]))); }; + auto func_0000 = Dune::PDELab::makeGridFunctionFromCallable(gv, lambda_0000); + Dune::PDELab::interpolate(func_0000, p1_dirichlet_gfs_, x_r); + + auto lambda_0001 = [&](const auto& x){ return (0.5-x[0])*(0.5-x[0]) + (0.5-x[1])*(0.5-x[1]); }; + auto func_0001 = Dune::PDELab::makeGridFunctionFromCallable(gv, lambda_0001); + Dune::PDELab::interpolate(func_0001, p2_gfs, c); + + // Solving + double reduction = initree.get<double>("reduction", 1e-12); + SLP slp(go_r, ls, x_r, reduction); + slp.apply(); + + // VTK visualization + using VTKWriter = Dune::SubsamplingVTKWriter<GV>; + Dune::RefinementIntervals subint(initree.get<int>("vtk.subsamplinglevel", 1)); + VTKWriter vtkwriter(gv, subint); + std::string vtkfile = initree.get<std::string>("wrapper.vtkcompare.name", "output"); + CuttingPredicate predicate; + Dune::PDELab::addSolutionToVTKWriter(vtkwriter, p1_dirichlet_gfs_, x_r, Dune::PDELab::vtk::defaultNameScheme(), predicate); + vtkwriter.write(vtkfile, Dune::VTK::ascii); + + // Error calculation + using P1_DIRICHLET_GFS__DGF = Dune::PDELab::DiscreteGridFunction<decltype(p1_dirichlet_gfs_),decltype(x_r)>; + P1_DIRICHLET_GFS__DGF p1_dirichlet_gfs__dgf(p1_dirichlet_gfs_,x_r); + using DifferenceSquaredAdapter_ = Dune::PDELab::DifferenceSquaredAdapter<decltype(func_0000), decltype(p1_dirichlet_gfs__dgf)>; + DifferenceSquaredAdapter_ dsa_(func_0000, p1_dirichlet_gfs__dgf); + RangeType l2error(0.0); + { + // L2 error squared of difference between numerical + // solution and the interpolation of exact solution + // for treepath () + typename P1_DIRICHLET_GFS__DGF::Traits::RangeType err(0.0); + Dune::PDELab::integrateGridFunction(dsa_, err, 10); + + l2error += err; + if (gv.comm().rank() == 0){ + std::cout << "L2 Error for treepath : " << err << std::endl; + }} + bool testfail(false); + using std::abs; + using std::isnan; + if (gv.comm().rank() == 0){ + std::cout << "\nl2errorsquared: " << l2error << std::endl << std::endl; + } + if (isnan(l2error) or abs(l2error)>1e-7) + testfail = true; + return testfail; +} diff --git a/test/coeffeval/coeffeval_poisson.mini b/test/coeffeval/coeffeval_poisson.mini new file mode 100644 index 0000000000000000000000000000000000000000..e2ec630c49cc25886022aac1a652a81d761a0513 --- /dev/null +++ b/test/coeffeval/coeffeval_poisson.mini @@ -0,0 +1,22 @@ +__name = coeffeval_poisson_{__exec_suffix} +__exec_suffix = {grad_suffix} + +grad_suffix = grad, nongrad | expand grad + +lowerleft = 0.0 0.0 +upperright = 1.0 1.0 +elements = 32 32 +elementType = simplical + +[formcompiler] +compare_l2errorsquared = 1e-7 + +[formcompiler.r] +classname = PoissonLocalOperator +filename = poisson_{grad_suffix}_localoperator.hh + +[formcompiler.ufl_variants] +use_grad = 1, 0 | expand grad + +[__static] +OPERATOR = 1, 0 | expand grad diff --git a/test/coeffeval/poisson.ufl b/test/coeffeval/poisson.ufl new file mode 100644 index 0000000000000000000000000000000000000000..378d9267fadebc0e6d5f63ba6cb7fef7ad5a2b40 --- /dev/null +++ b/test/coeffeval/poisson.ufl @@ -0,0 +1,21 @@ +cell = triangle + +x = SpatialCoordinate(cell) + +V = FiniteElement("CG", cell, 1) +u = TrialFunction(V) +v = TestFunction(V) + +P2 = FiniteElement("CG", cell, 2) +c = Coefficient(P2) + +if use_grad: + # This is a stupid trick to test gradients of coefficients in an easy setting. + # We interpolate c = x0^2 + x1^2 in the driver and we can verify, that below + # equation holds. That means we test evaluation of c in terms of evaluation + # of its gradient! + c = 0.25 * (grad(c)[0] * grad(c)[0] + grad(c)[1] * grad(c)[1]) + +f = 4*(1.-c)*exp(-1.*c) + +r = (inner(grad(u), grad(v)) - f*v)*dx diff --git a/test/heatequation/heatequation.mini b/test/heatequation/heatequation.mini index 6059854ec5b6c09a97d06712b928d1e7044ef3af..24a699673927de379260c45e50ea6b707dde95be 100644 --- a/test/heatequation/heatequation.mini +++ b/test/heatequation/heatequation.mini @@ -14,6 +14,7 @@ extension = vtu [formcompiler] explicit_time_stepping = 0, 1 | expand scheme compare_l2errorsquared = 1e-7 +operators = mass, poisson # Disable explicit tests for now {__exec_suffix} == explicit | exclude diff --git a/test/heatequation/heatequation.ufl b/test/heatequation/heatequation.ufl index 9fe2e20bff8bead3b823bf86c9cb4b4372f155c9..8a4ef977b3bd956c86df1f2aff8acb43d7ebd45b 100644 --- a/test/heatequation/heatequation.ufl +++ b/test/heatequation/heatequation.ufl @@ -13,7 +13,6 @@ v = TestFunction(V) mass = (u*v)*dx poisson = (inner(grad(u), grad(v)) - f*v)*dx -forms = [mass, poisson] -dirichlet_expression = g +interpolate_expression = g is_dirichlet = 1 exact_solution = g \ No newline at end of file diff --git a/test/heatequation/heatequation_dg.mini b/test/heatequation/heatequation_dg.mini index 3c21abe6c6a6f4ef80563db46ff49d83fdfdc17f..169be57b4a3d485505cc47ff132aa3cabecee212 100644 --- a/test/heatequation/heatequation_dg.mini +++ b/test/heatequation/heatequation_dg.mini @@ -14,6 +14,7 @@ extension = vtu [formcompiler] explicit_time_stepping = 0, 1 | expand scheme compare_l2errorsquared = 1e-7 +operators = mass, poisson # Disable explicit tests for now {__exec_suffix} == explicit | exclude diff --git a/test/heatequation/heatequation_dg.ufl b/test/heatequation/heatequation_dg.ufl index 8dfd74302e57c53f3672d4a0e25592dd0c4cd34c..1d113a88f35b8404d7129a2fe5a2494f060e394b 100644 --- a/test/heatequation/heatequation_dg.ufl +++ b/test/heatequation/heatequation_dg.ufl @@ -1,12 +1,13 @@ cell = triangle +degree = 1 +dim = 2 x = SpatialCoordinate(cell) - c = (0.5-x[0])**2 + (0.5-x[1])**2 g = exp(-1.*c) f = 4*(1.-c)*g -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -14,25 +15,28 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 poisson = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds mass = (u*v)*dx -forms = [mass, poisson] -dirichlet_expression = g +interpolate_expression = g is_dirichlet = 1 exact_solution = g \ No newline at end of file diff --git a/test/heatequation/heatequation_time_dependent_bc.mini b/test/heatequation/heatequation_time_dependent_bc.mini index 191cc52f1c9ff806496affb5eb6a7d8ebbdb25d9..762951ac66010cdc35f7c37c59873d22a72c7eaa 100644 --- a/test/heatequation/heatequation_time_dependent_bc.mini +++ b/test/heatequation/heatequation_time_dependent_bc.mini @@ -14,6 +14,7 @@ extension = vtu [formcompiler] explicit_time_stepping = 0, 1 | expand scheme compare_l2errorsquared = 2e-4 +operators = mass, poisson [instat] T = 1.0 diff --git a/test/heatequation/heatequation_time_dependent_bc.ufl b/test/heatequation/heatequation_time_dependent_bc.ufl index ac0079253cc170c757da9ae772eec8e4fd1a6087..6b8443c5712df1463c84576ba23702cb42c5c45b 100644 --- a/test/heatequation/heatequation_time_dependent_bc.ufl +++ b/test/heatequation/heatequation_time_dependent_bc.ufl @@ -16,7 +16,6 @@ v = TestFunction(V) mass = (u*v)*dx poisson = (inner(grad(u), grad(v)) - f*v)*dx -forms = [mass, poisson] -dirichlet_expression = g +interpolate_expression = g is_dirichlet = 1 exact_solution = g diff --git a/test/hyperbolic/linearacoustics.mini b/test/hyperbolic/linearacoustics.mini index 5ccd60388b75b190883de796ce1bd9f4ea2394e4..ad1cc95b405ebd4a7624ef8057b99834fb52ccbc 100644 --- a/test/hyperbolic/linearacoustics.mini +++ b/test/hyperbolic/linearacoustics.mini @@ -13,5 +13,11 @@ name = {__name} extension = vtu [formcompiler] +explicit_time_stepping = 1 +operators = mass, r + +[formcompiler.mass] +numerical_jacobian = 1 + +[formcompiler.r] numerical_jacobian = 1 -explicit_time_stepping = 1 \ No newline at end of file diff --git a/test/hyperbolic/linearacoustics.ufl b/test/hyperbolic/linearacoustics.ufl index 8b9d48c4433f72395c054c88ea6c4eaeedb9fcb0..5a78e7848578053dbb8d7f75a94c2f901f5831d3 100644 --- a/test/hyperbolic/linearacoustics.ufl +++ b/test/hyperbolic/linearacoustics.ufl @@ -21,12 +21,11 @@ flux = as_matrix([[q0, q1], [0., rho]]) # Define numerical fluxes to choose from -llf_flux = dot(avg(flux), n) - 0.5*jump(u) +llf_flux = dot(avg(flux), n) + 0.5*jump(u) numerical_flux = llf_flux r = -1. * inner(flux, grad(v))*dx \ - - inner(numerical_flux, jump(v))*dS \ + + inner(numerical_flux, jump(v))*dS \ + inner(u, v)*ds -forms = [mass, r] -dirichlet_expression = f, 0.0, 0.0 +interpolate_expression = f, 0.0, 0.0 diff --git a/test/hyperbolic/lineartransport.mini b/test/hyperbolic/lineartransport.mini index 1ca4dedeb11ae71cc2d9f3f99e55306ebd9580ed..60a465d670b1acc664c514afe6371a3c7038b51b 100644 --- a/test/hyperbolic/lineartransport.mini +++ b/test/hyperbolic/lineartransport.mini @@ -18,6 +18,12 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand diff explicit_time_stepping = 0, 1 | expand scheme -compare_l2errorsquared = 1e-10 \ No newline at end of file +compare_l2errorsquared = 1e-10 +operators = mass, r + +[formcompiler.mass] +numerical_jacobian = 1, 0 | expand diff + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand diff diff --git a/test/hyperbolic/lineartransport.ufl b/test/hyperbolic/lineartransport.ufl index 858a18bd32537eec046de5b382a95d95401e9aae..eaf8d33efe67a07a9a688efd3949e709a0a8122b 100644 --- a/test/hyperbolic/lineartransport.ufl +++ b/test/hyperbolic/lineartransport.ufl @@ -15,17 +15,16 @@ v = TestFunction(V) beta = as_vector((1., 1.)) n = FacetNormal(cell)('+') -def numerical_flux(normal, outside, inside): +def numerical_flux(normal, inside, outside): return conditional(inner(beta, n) > 0, inside, outside)*inner(beta, n) mass = u*v*dx r = -1.*u*inner(beta, grad(v))*dx \ - - numerical_flux(n, u('+'), u('-'))*jump(v)*dS \ + + numerical_flux(n, u('+'), u('-'))*jump(v)*dS \ + inner(beta, n)*u*v*dso \ - + numerical_flux(n, 0.0, u('-'))*v*dsd + + numerical_flux(n, u('+'), 0.0)*v*dsd -forms = [mass, r] is_dirichlet = dirichlet -dirichlet_expression = initial +interpolate_expression = initial exact_solution = 0 \ No newline at end of file diff --git a/test/hyperbolic/shallowwater.mini b/test/hyperbolic/shallowwater.mini index f72b422b2fb19fa39969fc0febae5cb39d647c88..39408b491829daadb409652db25a4c41e8606c31 100644 --- a/test/hyperbolic/shallowwater.mini +++ b/test/hyperbolic/shallowwater.mini @@ -14,5 +14,11 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 1 +operators = mass, r explicit_time_stepping = 1 + +[formcompiler.mass] +numerical_jacobian = 1 + +[formcompiler.r] +numerical_jacobian = 1 diff --git a/test/hyperbolic/shallowwater.ufl b/test/hyperbolic/shallowwater.ufl index 8737b2851cce2fb093a0f6000c8cc649356d3ff3..c58429633b4ec11f5a69fc87fdbf0ed82c034a1b 100644 --- a/test/hyperbolic/shallowwater.ufl +++ b/test/hyperbolic/shallowwater.ufl @@ -23,14 +23,13 @@ b_flux = as_matrix([[-1.* q], [q*q/h + 0.5*g*h*h]]) # Define numerical fluxes to choose from alpha = Max(abs(n[0]*q('+')) / h('+') + sqrt(g*h('+')), abs(n[0]*q('-')) / h('-') + sqrt(g*h('-'))) -llf_flux = dot(avg(flux), n) - 0.5*alpha*jump(u) +llf_flux = dot(avg(flux), n) + 0.5*alpha*jump(u) alpha_b = abs(n[0]*q) / h + sqrt(g*h) boundary_flux = 0.5*dot(flux + b_flux, n) + alpha_b * as_vector([0., q]) numerical_flux = llf_flux r = -1. * inner(flux, grad(v))*dx \ - - inner(numerical_flux, jump(v))*dS \ + + inner(numerical_flux, jump(v))*dS \ + inner(boundary_flux, v)*ds -forms = [mass, r] -dirichlet_expression = f, 0.0 +interpolate_expression = f, 0.0 diff --git a/test/laplace/CMakeLists.txt b/test/laplace/CMakeLists.txt deleted file mode 100644 index 5ae3a8576c632975f3f33be32aa17be96b6d4597..0000000000000000000000000000000000000000 --- a/test/laplace/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -dune_add_formcompiler_system_test(UFLFILE laplace.ufl - BASENAME laplace - INIFILE laplace.mini) - -dune_add_formcompiler_system_test(UFLFILE laplace_dg.ufl - BASENAME laplace_dg - INIFILE laplace_dg.mini) - -add_executable(laplace_dg_ref reference_main.cc) -set_target_properties(laplace_dg_ref PROPERTIES EXCLUDE_FROM_ALL 1) diff --git a/test/laplace/laplace.mini b/test/laplace/laplace.mini deleted file mode 100644 index 1db0ffd82475b75b830f085586bb2d991af2c514..0000000000000000000000000000000000000000 --- a/test/laplace/laplace.mini +++ /dev/null @@ -1,11 +0,0 @@ -__name = laplace_{__exec_suffix} -__exec_suffix = symdiff, numdiff | expand num - -lowerleft = 0.0 0.0 -upperright = 1.0 1.0 -elements = 4 4 -elementType = simplical -printmatrix = true - -[formcompiler] -numerical_jacobian = 0, 1 | expand num diff --git a/test/laplace/laplace.ufl b/test/laplace/laplace.ufl deleted file mode 100644 index 29b6a4bd3d6da839ca622098a222079d716a4f9e..0000000000000000000000000000000000000000 --- a/test/laplace/laplace.ufl +++ /dev/null @@ -1,5 +0,0 @@ -V = FiniteElement("CG", "triangle", 1) -u = TrialFunction(V) -v = TestFunction(V) - -forms = [inner(grad(u), grad(v))*dx] diff --git a/test/laplace/laplace_dg.mini b/test/laplace/laplace_dg.mini deleted file mode 100644 index 04a3c3dd0b0c3a5ee4f7d01c073b00f03d498b72..0000000000000000000000000000000000000000 --- a/test/laplace/laplace_dg.mini +++ /dev/null @@ -1,11 +0,0 @@ -__name = laplace_dg_{__exec_suffix} -__exec_suffix = numdiff, symdiff | expand num - -lowerleft = 0.0 0.0 -upperright = 1.0 1.0 -elements = 2 2 -elementType = simplical -printmatrix = true - -[formcompiler] -numerical_jacobian = 1, 0 | expand num diff --git a/test/laplace/laplace_dg.ufl b/test/laplace/laplace_dg.ufl deleted file mode 100644 index 1a30bcea2f224b8e7a81ec1bf1a60eb39f58ae16..0000000000000000000000000000000000000000 --- a/test/laplace/laplace_dg.ufl +++ /dev/null @@ -1,23 +0,0 @@ -cell = triangle -V = FiniteElement("DG", cell, 1) - -u = TrialFunction(V) -v = TestFunction(V) - -n = FacetNormal(cell)('+') - -# penalty factor -gamma = 1.0 - -# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 -theta = 1.0 - -r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ - - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ - + theta*u*inner(grad(v), n)*ds - -forms = [r] diff --git a/test/laplace/reference_driver.hh b/test/laplace/reference_driver.hh deleted file mode 100644 index 02791b7dcb785463376e4dca0c15407f496d0c74..0000000000000000000000000000000000000000 --- a/test/laplace/reference_driver.hh +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef _HOME_DOMINIC_DUNE_BUILD_DUNE_PERFTOOL_TEST_LAPLACE_LAPLACE_DG_SYMDIFF_DRIVER_HH -#define _HOME_DOMINIC_DUNE_BUILD_DUNE_PERFTOOL_TEST_LAPLACE_LAPLACE_DG_SYMDIFF_DRIVER_HH - -#include <dune/pdelab/gridoperator/gridoperator.hh> -#include <dune/pdelab/backend/istl.hh> -#include <dune/pdelab/backend/istl.hh> -#include <dune/pdelab/gridfunctionspace/vtk.hh> -#include <dune/grid/uggrid.hh> -#include <dune/pdelab/backend/istl.hh> -#include <string> -#include <dune/common/parametertree.hh> -#include <dune/pdelab/finiteelementmap/opbfem.hh> -#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh> -#include <dune/pdelab/stationary/linearproblem.hh> -#include <dune/common/parametertreeparser.hh> -#include <dune/testtools/gridconstruction.hh> -#include <dune/pdelab/localoperator/convectiondiffusiondg.hh> -#include <dune/pdelab/localoperator/convectiondiffusionparameter.hh> - -template<typename GV, typename RF> -class CDProb -{ - typedef Dune::PDELab::ConvectionDiffusionBoundaryConditions::Type BCType; - - public: - typedef Dune::PDELab::ConvectionDiffusionParameterTraits<GV,RF> Traits; - - //! tensor diffusion coefficient - typename Traits::PermTensorType - A (const typename Traits::ElementType& e, const typename Traits::DomainType& x) const - { - typename Traits::PermTensorType I; - for (std::size_t i=0; i<Traits::dimDomain; i++) - for (std::size_t j=0; j<Traits::dimDomain; j++) - I[i][j] = (i==j) ? 1 : 0; - return I; - } - - //! velocity field - typename Traits::RangeType - b (const typename Traits::ElementType& e, const typename Traits::DomainType& x) const - { - typename Traits::RangeType v(0.0); - return v; - } - - //! sink term - typename Traits::RangeFieldType - c (const typename Traits::ElementType& e, const typename Traits::DomainType& x) const - { - return 0.0; - } - - //! source term - typename Traits::RangeFieldType - f (const typename Traits::ElementType& e, const typename Traits::DomainType& x) const - { - return 0.0; - } - - //! boundary condition type function - BCType - bctype (const typename Traits::IntersectionType& is, const typename Traits::IntersectionDomainType& x) const - { - return Dune::PDELab::ConvectionDiffusionBoundaryConditions::Dirichlet; - } - - //! Dirichlet boundary condition value - typename Traits::RangeFieldType - g (const typename Traits::ElementType& e, const typename Traits::DomainType& x) const - { - return 0.0; - } - - //! Neumann boundary condition - typename Traits::RangeFieldType - j (const typename Traits::IntersectionType& is, const typename Traits::IntersectionDomainType& x) const - { - return 0.0; - } - - //! outflow boundary condition - typename Traits::RangeFieldType - o (const typename Traits::IntersectionType& is, const typename Traits::IntersectionDomainType& x) const - { - return 0.0; - } -}; - - -void driver(int argc, char** argv){ typedef Dune::PDELab::ISTL::VectorBackend<Dune::PDELab::ISTL::Blocking::none, 1> VectorBackend; - static const int dim = 2; - typedef Dune::UGGrid<dim> Grid; - typedef Grid::LeafGridView GV; - typedef Grid::ctype DF; - typedef double R; - typedef Dune::PDELab::OPBLocalFiniteElementMap<DF, R, 1, dim, Dune::GeometryType::simplex> DG1_FEM; - typedef Dune::PDELab::NoConstraints NoConstraintsAssembler; - typedef Dune::PDELab::GridFunctionSpace<GV, DG1_FEM, NoConstraintsAssembler, VectorBackend> DG1_DIRICHLET_GFS; - Dune::ParameterTree initree; - Dune::ParameterTreeParser::readINITree(argv[1], initree); - IniGridFactory<Grid> factory(initree); - std::shared_ptr<Grid> grid = factory.getGrid(); - GV gv = grid->leafGridView(); - DG1_FEM dg1_fem; - DG1_DIRICHLET_GFS dg1_dirichlet_gfs(gv, dg1_fem); - dg1_dirichlet_gfs.name("bla"); - typedef Dune::SubsamplingVTKWriter<GV> VTKWriter; - int sublevel = initree.get<int>("vtk.subsamplinglevel", 0); - VTKWriter vtkwriter(gv, sublevel); - using LOP = Dune::PDELab::ConvectionDiffusionDG<CDProb<GV, R>, DG1_FEM>; - typedef DG1_DIRICHLET_GFS::ConstraintsContainer<R>::Type DG1_CC; - typedef Dune::PDELab::ISTL::BCRSMatrixBackend<> MatrixBackend; - typedef Dune::PDELab::GridOperator<DG1_DIRICHLET_GFS, DG1_DIRICHLET_GFS, LOP, MatrixBackend, DF, R, R, DG1_CC, DG1_CC> GO; - typedef GO::Traits::Domain V; - V x(dg1_dirichlet_gfs); - x = 0.0; - std::string vtkfile = initree.get<std::string>("wrapper.vtkcompare.name", "output"); - typedef Dune::PDELab::ISTLBackend_SEQ_UMFPack LinearSolver; - typedef Dune::PDELab::StationaryLinearProblemSolver<GO, LinearSolver, V> SLP; - DG1_CC dg1_cc; - dg1_cc.clear(); - CDProb<GV, R> params; - LOP lop(params, Dune::PDELab::ConvectionDiffusionDGMethod::SIPG); - int generic_dof_estimate = 6 * dg1_dirichlet_gfs.maxLocalSize(); - int dofestimate = initree.get<int>("istl.number_of_nnz", generic_dof_estimate); - MatrixBackend mb(dofestimate); - GO go(dg1_dirichlet_gfs, dg1_cc, dg1_dirichlet_gfs, dg1_cc, lop, mb); - std::cout << "gfs with " << dg1_dirichlet_gfs.size() << " dofs generated "<< std::endl; - std::cout << "cc with " << dg1_cc.size() << " dofs generated "<< std::endl; - LinearSolver ls(false); - double reduction = initree.get<double>("reduction", 1e-12); - SLP slp(go, ls, x, reduction); - slp.apply(); - typedef typename GO::Traits::Jacobian M; - M m(go); - go.jacobian(x,m); - using Dune::PDELab::Backend::native; - Dune::printmatrix(std::cout, native(m),"global stiffness matrix","row",9,1); - Dune::PDELab::addSolutionToVTKWriter(vtkwriter, dg1_dirichlet_gfs, x); - vtkwriter.write(vtkfile, Dune::VTK::ascii); -} - -#endif //_HOME_DOMINIC_DUNE_BUILD_DUNE_PERFTOOL_TEST_LAPLACE_LAPLACE_DG_SYMDIFF_DRIVER_HH diff --git a/test/laplace/reference_main.cc b/test/laplace/reference_main.cc deleted file mode 100644 index ff00d5c060de6a23fafc79ffb7aeabb8f5d1ac9d..0000000000000000000000000000000000000000 --- a/test/laplace/reference_main.cc +++ /dev/null @@ -1,33 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <dune/common/parallel/mpihelper.hh> -#include <dune/common/exceptions.hh> - -#include"/home/dominic/dune/dune-perftool/test/laplace/reference_driver.hh" - -int main(int argc, char** argv) -{ - try{ - //Maybe initialize Mpi - Dune::MPIHelper& helper = Dune::MPIHelper::instance(argc, argv); - if(Dune::MPIHelper::isFake) - std::cout<< "This is a sequential program." << std::endl; - else - std::cout<<"I am rank "<<helper.rank()<<" of "<<helper.size() - <<" processes!"<<std::endl; - - driver(argc, argv); - - return 0; - } - catch (Dune::Exception &e){ - std::cerr << "Dune reported error: " << e << std::endl; - return 1; - } - catch (...){ - std::cerr << "Unknown exception thrown!" << std::endl; - return 1; - } -} diff --git a/test/navier-stokes/CMakeLists.txt b/test/navier-stokes/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b87827a05984dbdded01684d7b328d80d48895b1 --- /dev/null +++ b/test/navier-stokes/CMakeLists.txt @@ -0,0 +1,12 @@ +add_subdirectory(reference_program) + +dune_add_formcompiler_system_test(UFLFILE navierstokes_2d_dg_quadrilateral.ufl + BASENAME navierstokes_2d_dg_quadrilateral + INIFILE navierstokes_2d_dg_quadrilateral.mini + SCRIPT dune_execute_parallel.py + ) + +dune_add_formcompiler_system_test(UFLFILE navierstokes_3d_dg_quadrilateral.ufl + BASENAME navierstokes_3d_dg_quadrilateral + INIFILE navierstokes_3d_dg_quadrilateral.mini + ) diff --git a/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini new file mode 100644 index 0000000000000000000000000000000000000000..5700107d03d62655e87b50b129aabc5b534134d2 --- /dev/null +++ b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini @@ -0,0 +1,39 @@ +__name = navierstokes_2d_dg_quadrilateral_{__exec_suffix} +__exec_suffix = symdiff, numdiff | expand num + +cells = 16 16 +lowerleft = -1. -1. +extension = 2. 2. +periodic = true true + +printmatrix = false + +[wrapper.execute_parallel] +numprocessors = 4 + +[wrapper.vtkcompare] +name = {__name} +extension = vtu + +[formcompiler] +operators = mass, r +compare_l2errorsquared = 5e-5 +# Only calculate error for the velocity part +l2error_tree_path = 1, 1, 0 +explicit_time_stepping = 0 +yaspgrid_offset = 1 +overlapping = 1 + +[formcompiler.mass] +numerical_jacobian = 0, 1 | expand num + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num + +[instat] +T = 1e-2 +dt = 1e-3 +nth = 1 + +# Disable numdiff tests +{__exec_suffix} == numdiff | exclude diff --git a/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl new file mode 100644 index 0000000000000000000000000000000000000000..957d715e82acf50a936b9977957b3ae16c1d3e3b --- /dev/null +++ b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl @@ -0,0 +1,47 @@ +# Taylor-Green vortex + +cell = quadrilateral +degree = 2 +dim = 2 + +x = SpatialCoordinate(cell) +time = get_time(cell) + +P2 = VectorElement("DG", cell, degree) +P1 = FiniteElement("DG", cell, degree-1) +TH = P2 * P1 + +v, q = TestFunctions(TH) +u, p = TrialFunctions(TH) + +n = FacetNormal(cell)('+') + +rho = 1.0 +mu = 1.0/100.0 + +g_v = as_vector((-exp(-2*pi*mu/rho*time)*cos(pi*x[0])*sin(pi*x[1]), + exp(-2*pi*mu/rho*time)*sin(pi*x[0])*cos(pi*x[1]))) +g_p = -0.25*rho*exp(-4*pi*pi*mu/rho*time)*(cos(2*pi*x[0])+cos(2*pi*x[1])) + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int + +mass = rho*inner(u,v)*dx + +r = mu * inner(grad(u), grad(v))*dx \ + - p*div(v)*dx \ + - q*div(u)*dx \ + + rho * inner(grad(u)*u,v)*dx \ + - mu * inner(avg(grad(u))*n, jump(v))*dS \ + + mu * gamma_int * inner(jump(u), jump(v))*dS \ + + mu * theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ + +interpolate_expression = g_v, g_p +exact_solution = g_v, g_p diff --git a/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini new file mode 100644 index 0000000000000000000000000000000000000000..a329eaaff7b46954009490c63b672f8830c3a73e --- /dev/null +++ b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini @@ -0,0 +1,33 @@ +__name = navierstokes_3d_dg_quadrilateral_{__exec_suffix} +__exec_suffix = symdiff, numdiff | expand num + +cells = 4 4 4 +lowerleft = -1. -1. -1. +extension = 2. 2. 2. + +printmatrix = false + +[wrapper.vtkcompare] +name = {__name} +extension = vtu + +[formcompiler] +explicit_time_stepping = 0 +yaspgrid_offset = 1 +compare_l2errorsquared = 5e-4 +# Only calculate error for the velocity part +l2error_tree_path = 1, 1, 1, 0 +operators = mass, r + +[formcompiler.mass] +numerical_jacobian = 0, 1 | expand num + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num + +[instat] +T = 1e-1 +dt = 5e-2 + +# Disable numdiff tests +{__exec_suffix} == numdiff | exclude diff --git a/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl new file mode 100644 index 0000000000000000000000000000000000000000..96038a8b4ad4cc0b2ec8254f5a0bfcbcd4922373 --- /dev/null +++ b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl @@ -0,0 +1,58 @@ +# Beltrami flow + +cell = hexahedron +degree = 2 +dim = 3 + +x = SpatialCoordinate(cell) +time = get_time(cell) + +P2 = VectorElement("DG", cell, degree) +P1 = FiniteElement("DG", cell, degree-1) +TH = P2 * P1 + +v, q = TestFunctions(TH) +u, p = TrialFunctions(TH) + +n = FacetNormal(cell)('+') + +rho = 1.0 +mu = 1.0 + +a = pi/4 +d = pi/2 +g_v = as_vector((-a*exp(-d*d*time)*(exp(a*x[0])*sin(d*x[2]+a*x[1])+cos(d*x[1]+a*x[0])*exp(a*x[2])), + -a*exp(-d*d*time)*(exp(a*x[0])*cos(d*x[2]+a*x[1])+exp(a*x[1])*sin(a*x[2]+d*x[0])), + -a*exp(-d*d*time)*(exp(a*x[1])*cos(a*x[2]+d*x[0])+sin(d*x[1]+a*x[0])*exp(a*x[2])))) +g_p = -0.5*a*a*rho*exp(-d*d*time)* ( 2*cos(d*x[1]+a*x[0])*exp(a*(x[2]+x[0]) )*sin(d*x[2]+a*x[1]) + 2*exp(a*(x[1]+x[0]))*sin(a*x[2]+d*x[0])*cos(d*x[2]+a*x[1]) + 2*sin(d*x[1]+a*x[0])*exp(a*(x[2]+x[1]))*cos(a*x[2]+d*x[0]) + exp(2*a*x[2]) + exp(2*a*x[1]) + exp(2*a*x[0]) ) + + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int + +mass = rho*inner(u,v)*dx + +r = mu * inner(grad(u), grad(v))*dx \ + - p*div(v)*dx \ + - q*div(u)*dx \ + + rho * inner(grad(u)*u,v)*dx \ + - mu * inner(avg(grad(u))*n, jump(v))*dS \ + + mu * gamma_int * inner(jump(u), jump(v))*dS \ + + mu * theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ + - mu * inner(grad(u)*n, v)*ds \ + + mu * gamma_ext * inner(u-g_v, v)*ds \ + + mu * theta * inner(grad(v)*n, u-g_v)*ds \ + + p*inner(v, n)*ds \ + + q*inner(u-g_v, n)*ds + +interpolate_expression = g_v, g_p +exact_solution = g_v, g_p \ No newline at end of file diff --git a/test/navier-stokes/reference_program/CMakeLists.txt b/test/navier-stokes/reference_program/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..21af3e83f8d1fdec5377291b521f8712987a494d --- /dev/null +++ b/test/navier-stokes/reference_program/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(taylor-green taylor-green.cc) +dune_symlink_to_source_files(FILES taylor-green.ini) +set_target_properties(taylor-green PROPERTIES EXCLUDE_FROM_ALL 1) diff --git a/test/navier-stokes/reference_program/taylor-green.cc b/test/navier-stokes/reference_program/taylor-green.cc new file mode 100644 index 0000000000000000000000000000000000000000..e48e04296122dd89b615166f33d21219324d0726 --- /dev/null +++ b/test/navier-stokes/reference_program/taylor-green.cc @@ -0,0 +1,310 @@ +// -*- tab-width: 2; indent-tabs-mode: nil -*- +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <iostream> +#include <vector> +#include <map> +#include <string> +#include <random> +#include <dune/common/parallel/mpihelper.hh> +#include <dune/common/exceptions.hh> +#include <dune/common/fvector.hh> +#include <dune/grid/yaspgrid.hh> +#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh> +#include <dune/istl/bvector.hh> +#include <dune/istl/operators.hh> +#include <dune/istl/solvers.hh> +#include <dune/istl/preconditioners.hh> +#include <dune/istl/io.hh> + +#include <dune/pdelab/common/function.hh> +#include <dune/pdelab/common/functionutilities.hh> +#include <dune/pdelab/finiteelementmap/qkdg.hh> +#include <dune/pdelab/gridfunctionspace/gridfunctionspaceutilities.hh> +#include <dune/pdelab/gridfunctionspace/subspace.hh> +#include <dune/pdelab/gridfunctionspace/vectorgridfunctionspace.hh> +#include <dune/pdelab/gridfunctionspace/vtk.hh> +#include <dune/pdelab/gridoperator/gridoperator.hh> +#include <dune/pdelab/gridfunctionspace/interpolate.hh> +#include <dune/pdelab/localoperator/dgnavierstokes.hh> +#include <dune/pdelab/backend/istl.hh> +#include <dune/pdelab/finiteelementmap/monomfem.hh> +#include <dune/pdelab/common/function.hh> +#include <dune/pdelab/common/vtkexport.hh> +#include <dune/pdelab/constraints/p0.hh> +#include<dune/pdelab/gridoperator/onestep.hh> +#include<dune/pdelab/newton/newton.hh> +#include "dune/perftool/vtkpredicate.hh" +#include "dune/grid/io/file/vtk/vtksequencewriter.hh" + +#include "taylor-green.hh" + + +#define PERIODIC +// #define NORMALIZE_PRESSURE + +//=============================================================== +// Problem setup and solution +//=============================================================== +template<typename GV, typename RF, int vOrder, int pOrder> +void taylor_green (const GV& gv, const Dune::ParameterTree& configuration, std::string filename) +{ + // Some types + using ES = Dune::PDELab::AllEntitySet<GV>; + ES es(gv); + using DF = typename ES::Grid::ctype; + static const unsigned int dim = ES::dimension; + Dune::Timer timer; + + // Create finite element maps + const int velocity_degree = 2; + const int pressure_degree = 1; + using FEM_V = Dune::PDELab::QkDGLocalFiniteElementMap<DF, RF, velocity_degree, dim>; + using FEM_P = Dune::PDELab::QkDGLocalFiniteElementMap<DF, RF, pressure_degree, dim>; + FEM_V fem_v; + FEM_P fem_p; + + // Do not block anything and order it lexicographic + using VectorBackend_V = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>; + using VectorBackend_P = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>; + using VectorBackend = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>; + using OrderingTag_V = Dune::PDELab::LexicographicOrderingTag; + + // For periodic boundary conditions in Yasp grid we need an + // overlap. Therefore we run our program in parallel and need these + // constraints +#ifdef PERIODIC + using Con = Dune::PDELab::P0ParallelConstraints; +#else + using Con = Dune::PDELab::NoConstraints; +#endif + + // Velocity GFS + using GFS_V = Dune::PDELab::VectorGridFunctionSpace< + ES,FEM_V,dim, + VectorBackend, + VectorBackend_V, + Con, + OrderingTag_V + >; + GFS_V gfs_v(es,fem_v); + gfs_v.name("v"); + + // Pressure GFS + using GFS_P = Dune::PDELab::GridFunctionSpace< + ES, + FEM_P, + Con, + VectorBackend_P>; + GFS_P gfs_p(es,fem_p); + gfs_p.name("p"); + + + // GFS + using OrderingTag = Dune::PDELab::LexicographicOrderingTag; + using GFS = Dune::PDELab::CompositeGridFunctionSpace<VectorBackend,OrderingTag,GFS_V,GFS_P>; + GFS gfs(gfs_v, gfs_p); + using namespace Dune::Indices; + gfs_v.child(_0).name("velocity_0"); + gfs_v.child(_1).name("velocity_1"); + gfs_p.name("pressure"); + gfs.name("test"); + gfs.update(); + using CC = typename GFS::template ConstraintsContainer<double>::Type; + CC cc; + cc.clear(); +#ifdef PERIODIC + Dune::PDELab::constraints(gfs,cc); +#endif + std::cout << "gfs with " << gfs.size() << " dofs generated "<< std::endl; + std::cout << "cc with " << cc.size() << " dofs generated "<< std::endl; + + // Parameter functions + using FType = ZeroVectorFunction<ES,RF,dim>; + FType f(es); + using BType = BCTypeParamGlobalDirichlet; + BType b; + using VType = TaylorGreenVelocity<ES,RF,dim>; + VType v(es, configuration.sub("parameters")); + using PType = TaylorGreenPressure<ES,RF>; + PType p(es, configuration.sub("parameters")); + using PenaltyTerm = Dune::PDELab::DefaultInteriorPenalty<RF>; + + // Local operator + using LOP_Parameters = + Dune::PDELab::DGNavierStokesParameters<ES,RF,FType,BType,VType,PType,true,false,PenaltyTerm>; + LOP_Parameters lop_parameters(configuration.sub("parameters"),f,b,v,p); + using LOP = Dune::PDELab::DGNavierStokes<LOP_Parameters>; + const int superintegration_order = 0; + LOP lop(lop_parameters,superintegration_order); + using LOP_M = Dune::PDELab::NavierStokesMass<LOP_Parameters>; + LOP_M lop_m(lop_parameters,1); + + // Grid operator + using MBE = Dune::PDELab::istl::BCRSMatrixBackend<>; + MBE mbe(75); // Maximal number of nonzeroes per row can be cross-checked by patternStatistics(). + using GO_R = Dune::PDELab::GridOperator<GFS,GFS,LOP,MBE,RF,RF,RF,CC,CC>; + GO_R go_r(gfs,cc,gfs,cc,lop,mbe); + using GO_M = Dune::PDELab::GridOperator<GFS,GFS,LOP_M,MBE,RF,RF,RF,CC,CC>; + GO_M go_m(gfs,cc,gfs,cc,lop_m,mbe); + using IGO = Dune::PDELab::OneStepGridOperator<GO_R,GO_M>; + IGO igo(go_r,go_m); + + // Create initial solution + using InitialVelocity = TaylorGreenVelocity<GV,RF,2>; + InitialVelocity initial_velocity(gv, configuration.sub("parameters")); + using InitialPressure = TaylorGreenPressure<GV,RF>; + InitialPressure initial_pressure(gv, configuration.sub("parameters")); + using InitialSolution = Dune::PDELab::CompositeGridFunction<InitialVelocity,InitialPressure>; + InitialSolution initial_solution(initial_velocity, initial_pressure); + + // Make coefficent vector and initialize it from a function + using V = typename IGO::Traits::Domain; + V xold(gfs); + xold = 0.0; + Dune::PDELab::interpolate(initial_solution,gfs,xold); + + // Solver +#ifdef PERIODIC + using LinearSolver = Dune::PDELab::ISTLBackend_OVLP_BCGS_ILU0<GFS,CC>; + LinearSolver ls(gfs,cc); +#else + using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_BCGS_ILU0; + LinearSolver ls; + // using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_UMFPack; + // LinearSolver ls(false); +#endif + using PDESolver = Dune::PDELab::Newton<IGO,LinearSolver,V>; + PDESolver newton(igo,xold,ls); + // newton.setReassembleThreshold(0.0); + // newton.setVerbosityLevel(2); + // newton.setMaxIterations(50); + // newton.setLineSearchMaxIterations(30); + + // Time stepping + // using TSM = Dune::PDELab::OneStepThetaParameter<RF>; + // TSM tsm(1.0); + using TSM = Dune::PDELab::Alexander2Parameter<RF>; + TSM tsm; + Dune::PDELab::OneStepMethod<RF,IGO,PDESolver,V,V> osm(tsm,igo,newton); + // osm.setVerbosityLevel(2); + + // Set time + RF time = 0.0; + RF time_end = configuration.get<RF>("driver.time_end"); + RF dt = configuration.get<RF>("driver.dt"); + RF dt_min = 1e-8; + + // Visualize initial condition + using VTKSW = Dune::VTKSequenceWriter<GV>; + using VTKWriter = Dune::SubsamplingVTKWriter<GV>; + VTKWriter vtkwriter(gv, 2); + VTKSW vtkSequenceWriter(std::make_shared<VTKWriter>(vtkwriter), filename); + CuttingPredicate predicate; + Dune::PDELab::addSolutionToVTKWriter(vtkSequenceWriter, gfs, xold, Dune::PDELab::vtk::defaultNameScheme(), predicate); + vtkSequenceWriter.write(time, Dune::VTK::appendedraw); + + V x(gfs,0.0); + +#ifdef NORMALIZE_PRESSURE + // Pressure normalization + using PressureSubGFS = typename Dune::PDELab::GridFunctionSubSpace <GFS,Dune::TypeTree::TreePath<1> >; + PressureSubGFS pressureSubGfs(gfs); + using PDGF = Dune::PDELab::DiscreteGridFunction<PressureSubGFS,V>; + PDGF pdgf(pressureSubGfs,x); + typename PDGF::Traits::RangeType pressure_integral(0); + + int elements = int(sqrt(gv.size(0))); + int pressure_index = elements * elements * dim * pow((velocity_degree + 1), dim); + using Dune::PDELab::Backend::native; + std::cout << std::endl; + std::cout << "info elements: " << elements << std::endl; + std::cout << "info pressure_index: " << pressure_index << std::endl; + std::cout << "info gfs.size(): " << gfs.size() << std::endl; + std::cout << "info native(x).size(): " << native(x).size() << std::endl; + std::cout << std::endl; +#endif + + // Time loop + int step = 0; + const int nth = configuration.get<RF>("driver.nth"); + while (time < time_end - dt_min*0.5){ + osm.apply(time,dt,xold,x); + +#ifdef NORMALIZE_PRESSURE + // Correct pressure after each step. Without this pressure + // correction the velocity will be ok but the pressure will be + // shifted by a constant. + Dune::PDELab::integrateGridFunction(pdgf,pressure_integral,2); + pressure_integral = gv.comm().sum(pressure_integral); + std::cout << gv.comm().rank() << " pressure_integral before normalization: " << pressure_integral << std::endl; + + // Scale integral + pressure_integral = pressure_integral/4; + for (int i=pressure_index; i<gfs.size(); ++i){ + native(x)[i] -= pressure_integral; + } + Dune::PDELab::integrateGridFunction(pdgf,pressure_integral,2); + pressure_integral = gv.comm().sum(pressure_integral); + std::cout << "pressure_integral after normalization: " << pressure_integral << std::endl; +#endif + + xold = x; + time += dt; + step++; + + if(step%nth==0){ + vtkSequenceWriter.write(time, Dune::VTK::appendedraw); + } + } +} + +//=============================================================== +// Main program with grid setup +//=============================================================== +int main(int argc, char** argv) +{ + try{ + // Maybe initialize Mpi + Dune::MPIHelper::instance(argc, argv); + + // Read ini file + Dune::ParameterTree configuration; + const std::string config_filename("taylor-green.ini"); + std::cout << "Reading ini-file \""<< config_filename + << "\"" << std::endl; + Dune::ParameterTreeParser::readINITree(config_filename, configuration); + + // Create grid + const int dim = 2; + const int cells_per_dir = configuration.get<double>("driver.cells_per_dir"); + Dune::FieldVector<double,dim> lowerleft(-1.0); + Dune::FieldVector<double,dim> upperright(1.0); + std::array<int, dim> cells(Dune::fill_array<int, dim>(cells_per_dir)); + std::bitset<dim> periodic(false); + int overlap = 0; +#ifdef PERIODIC + periodic[0] = true; + periodic[1] = true; + overlap = 1; +#endif + using Grid = Dune::YaspGrid<dim, Dune::EquidistantOffsetCoordinates<double, dim> >; + Grid grid(lowerleft, upperright, cells, periodic, overlap); + + // Solve problem + using GV = Grid::LeafGridView; + const GV gv=grid.leafGridView(); + Dune::dinfo.push(false); + taylor_green<GV,double,2,1>(gv,configuration,"taylor-green"); + return 0; + } + catch (Dune::Exception &e){ + std::cerr << "Dune reported error: " << e << std::endl; + return 1; + } + catch (...){ + std::cerr << "Unknown exception thrown!" << std::endl; + return 1; + } +} diff --git a/test/navier-stokes/reference_program/taylor-green.hh b/test/navier-stokes/reference_program/taylor-green.hh new file mode 100644 index 0000000000000000000000000000000000000000..fc30069f6507ac9e10f36c147fa9188ce8f86293 --- /dev/null +++ b/test/navier-stokes/reference_program/taylor-green.hh @@ -0,0 +1,149 @@ +#ifndef TAYLOR_GREEN_HH +#define TAYLOR_GREEN_HH + +//=============================================================== +// Define parameter functions f,g,j and \partial\Omega_D/N +//=============================================================== + +// constraints parameter class for selecting boundary condition type +class BCTypeParamGlobalDirichlet +{ +public : + typedef Dune::PDELab::StokesBoundaryCondition BC; + + struct Traits + { + typedef BC::Type RangeType; + }; + + BCTypeParamGlobalDirichlet() {} + + template<typename I> + inline void evaluate (const I & intersection, /*@\label{bcp:name}@*/ + const Dune::FieldVector<typename I::ctype, I::dimension-1> & coord, + BC::Type& y) const + { + y = BC::VelocityDirichlet; + } + + template<typename T> + void setTime(T t){ + } +}; + + +template<typename GV, typename RF, int dim> +class TaylorGreenVelocity : + public Dune::PDELab::AnalyticGridFunctionBase< + Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim>, + TaylorGreenVelocity<GV,RF,dim> > +{ +public: + typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim> Traits; + typedef Dune::PDELab::AnalyticGridFunctionBase<Traits,TaylorGreenVelocity<GV,RF,dim> > BaseT; + + typedef typename Traits::DomainType DomainType; + typedef typename Traits::RangeType RangeType; + + TaylorGreenVelocity(const GV & gv, const Dune::ParameterTree& params) : BaseT(gv) + { + mu = params.get<RF>("mu"); + rho = params.get<RF>("rho"); + time = 0.0; + } + + inline void evaluateGlobal(const DomainType & x, RangeType & y) const + { + // TODO Get mu and rho from somewhere else! + RF pi = 3.14159265358979323846; + RF nu = mu/rho; + y[0] = -exp(-2.0*pi*pi*nu*time)*cos(pi*x[0])*sin(pi*x[1]); + y[1] = exp(-2.0*pi*pi*nu*time)*sin(pi*x[0])*cos(pi*x[1]); + } + + template <typename T> + void setTime(T t){ + time = t; + } + +private: + RF rho; + RF mu; + RF time; +}; + + +template<typename GV, typename RF> +class TaylorGreenPressure + : public Dune::PDELab::AnalyticGridFunctionBase< + Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,1>, + TaylorGreenPressure<GV,RF> > +{ +public: + typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,1> Traits; + typedef Dune::PDELab::AnalyticGridFunctionBase<Traits,TaylorGreenPressure<GV,RF> > BaseT; + + typedef typename Traits::DomainType DomainType; + typedef typename Traits::RangeType RangeType; + + TaylorGreenPressure (const GV& gv, const Dune::ParameterTree& params) : BaseT(gv) + { + mu = params.get<RF>("mu"); + rho = params.get<RF>("rho"); + time = 0.0; + } + + inline void evaluateGlobal (const typename Traits::DomainType& x, + typename Traits::RangeType& y) const + { + RF pi = 3.14159265358979323846; + RF nu = mu/rho; + y = -0.25*rho*exp(-4.0*pi*pi*nu*time)*(cos(2.0*pi*x[0])+cos(2.0*pi*x[1])); + } + + template<typename T> + void setTime(T t){ + time = t; + } + +private: + RF rho; + RF mu; + RF time; +}; + + + +template<typename GV, typename RF, std::size_t dim_range> +class ZeroVectorFunction : + public Dune::PDELab::AnalyticGridFunctionBase< + Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim_range>, + ZeroVectorFunction<GV,RF,dim_range> >, + public Dune::PDELab::InstationaryFunctionDefaults +{ +public: + typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim_range> Traits; + typedef Dune::PDELab::AnalyticGridFunctionBase<Traits, ZeroVectorFunction> BaseT; + + typedef typename Traits::DomainType DomainType; + typedef typename Traits::RangeType RangeType; + + ZeroVectorFunction(const GV & gv) : BaseT(gv) {} + + inline void evaluateGlobal(const DomainType & x, RangeType & y) const + { + y=0; + } +}; + +template<typename GV, typename RF> +class ZeroScalarFunction + : public ZeroVectorFunction<GV,RF,1> +{ +public: + + ZeroScalarFunction(const GV & gv) : ZeroVectorFunction<GV,RF,1>(gv) {} + +}; + +#endif diff --git a/test/navier-stokes/reference_program/taylor-green.ini b/test/navier-stokes/reference_program/taylor-green.ini new file mode 100644 index 0000000000000000000000000000000000000000..b7e94815a97507b65f0d85838c63882e349b860d --- /dev/null +++ b/test/navier-stokes/reference_program/taylor-green.ini @@ -0,0 +1,14 @@ +[parameters] +rho = 1.0 +mu = 0.01 + +[parameters.dg] +epsilon = -1 +sigma = 6.0 +beta = 1.0 + +[driver] +time_end = 1.0 +dt = 1e-3 +nth = 20 +cells_per_dir = 16 diff --git a/test/navier-stokes/reference_program/taylor_green_solution.py b/test/navier-stokes/reference_program/taylor_green_solution.py new file mode 100644 index 0000000000000000000000000000000000000000..0def653962e95367a9b24ee5f5a80773cbb0340e --- /dev/null +++ b/test/navier-stokes/reference_program/taylor_green_solution.py @@ -0,0 +1,99 @@ +import numpy as np +import matplotlib.pyplot as plt + +def pressure(t, x, y): + rho = 1.0 + mu = 1.0/100 + nu = mu/rho + pi = np.pi + return -0.25*rho*np.exp(-4.0*pi**2*nu*t)*(np.cos(2.0*pi*x) + np.cos(2.0*pi*y)) + + +def velocity(t, x, y): + rho = 1.0 + mu = 1.0/100 + nu = mu/rho + pi = np.pi + v = np.empty(2) + v[0] = -np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y) + v[1] = np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*cos(pi*y) + return v + + +def v_0(t, x, y): + rho = 1.0 + mu = 1.0/100 + nu = mu/rho + pi = np.pi + return -np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y) + + +def v_1(t, x, y): + rho = 1.0 + mu = 1.0/100 + nu = mu/rho + pi = np.pi + return np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*cos(pi*y) + + +def velocity_norm(t, x, y): + rho = 1.0 + mu = 1.0/100 + nu = mu/rho + pi = np.pi + return np.sqrt((-np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y))**2 + (np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*np.cos(pi*y))**2) + + +def plot_pressure(t, n): + h = 2.0/n + x = np.arange(-1,1,h) + y = np.arange(-1,1,h) + xx, yy = np.meshgrid(x, y, sparse=True) + z = pressure(t, xx, yy) + CS = plt.contourf(x,y,z) + cbar = plt.colorbar(CS) + plt.show() + + +def minmax_pressure(t, n): + h = 2.0/n + x = np.arange(-1,1,h) + y = np.arange(-1,1,h) + xx, yy = np.meshgrid(x, y, sparse=True) + z = pressure(t, xx, yy) + return np.min(z), np.max(z) + + +def plot_velocity(t, n): + h = 2.0/n + x = np.arange(-1,1,h) + y = np.arange(-1,1,h) + xx, yy = np.meshgrid(x, y, sparse=True) + z = velocity_norm(t, xx, yy) + CS = plt.contourf(x,y,z) + cbar = plt.colorbar(CS) + plt.show() + + +def minmax_velocity_norm(t, n): + h = 2.0/n + x = np.arange(-1,1,h) + y = np.arange(-1,1,h) + xx, yy = np.meshgrid(x, y, sparse=True) + z = velocity_norm(t, xx, yy) + return np.min(z), np.max(z) + + +print(minmax_velocity_norm(1.0, 64)) + +# plot_pressure(1.0, 100) +# plot_velocity(1.0, 100) + +# dt = 1.0e-4 +# n = 1000 +# t = 0.0 + +# for i in range(20): +# minimum, maximum = minmax_pressure(t, n) +# print("t: {}, n: {}, minumum: {}, maximum: {}".format(t,n,minimum,maximum)) +# t = t + dt diff --git a/test/nonlinear/diffusivewave.mini b/test/nonlinear/diffusivewave.mini index 2f877983394b41d54bb1592276d71d052776ca7e..c82f8ed23330982d6caf40f21d71dbb5d96f0588 100644 --- a/test/nonlinear/diffusivewave.mini +++ b/test/nonlinear/diffusivewave.mini @@ -14,5 +14,12 @@ dt = 0.001 T = 0.01 [formcompiler] +operators = mass, poisson + +[formcompiler.mass] +sumfact = 0, 1 | expand sf +fastdg = 0, 0 | expand sf + +[formcompiler.operator] sumfact = 0, 1 | expand sf -fastdg = 0, 0 | expand sf \ No newline at end of file +fastdg = 0, 0 | expand sf diff --git a/test/nonlinear/diffusivewave.ufl b/test/nonlinear/diffusivewave.ufl index 66b2524332f317ac05bcdcbd19404315931c974e..5dddfe3cb719343a82fd74f9a109460a97ecb71e 100644 --- a/test/nonlinear/diffusivewave.ufl +++ b/test/nonlinear/diffusivewave.ufl @@ -1,8 +1,10 @@ cell = quadrilateral +degree = 1 +dim = 2 x = SpatialCoordinate(cell) -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -10,7 +12,11 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +# h_ext = CellVolume(cell) / FacetArea(cell) +# gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 @@ -20,16 +26,16 @@ K = u**(5./3.) # / Max(1e-8, norm) poisson = inner(K*grad(u), grad(v))*dx \ - + inner(n, avg(K*grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(K*grad(v)), n)*dS + - inner(n, avg(K*grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(K*grad(v)), n)*dS # - inner(n, K*grad(u))*v*ds \ -# + gamma*u*v*ds \ +# + gamma_ext*u*v*ds \ # + theta*u*inner(K*grad(v), n)*ds \ -# - theta*g*inner(K*grad(v), n)*ds \ -# - gamma*g*v*ds +# - gamma_ext*g*v*ds \ +# - theta*g*inner(K*grad(v), n)*ds mass = (u*v)*dx forms = [mass, poisson] -dirichlet_expression = sin(pi*x[0]) +interpolate_expression = sin(pi*x[0]) diff --git a/test/nonlinear/nonlinear.ufl b/test/nonlinear/nonlinear.ufl index f81b287e504909b1381a4ac71f3a84af5e34a01c..425c6cee834c2e940ef2a6b698ac5eb22eaf4bd9 100644 --- a/test/nonlinear/nonlinear.ufl +++ b/test/nonlinear/nonlinear.ufl @@ -10,7 +10,6 @@ v = TestFunction(V) r = (inner(grad(u), grad(v)) + u*u*v - f*v)*dx -forms = [r] exact_solution = g -dirichlet_expression = g +interpolate_expression = g is_dirichlet = 1 \ No newline at end of file diff --git a/test/nonlinear/nonlinear_dg.ufl b/test/nonlinear/nonlinear_dg.ufl index 5fba927e4256b2c32d1b4a5766c78046dffbddc1..41dfb430c1048e011c04843f496b35947695877f 100644 --- a/test/nonlinear/nonlinear_dg.ufl +++ b/test/nonlinear/nonlinear_dg.ufl @@ -1,10 +1,12 @@ cell = "triangle" -x = SpatialCoordinate(cell) +degree = 1 +dim = 2 +x = SpatialCoordinate(cell) f = -4. g = x[0]*x[0] + x[1]*x[1] -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -15,22 +17,25 @@ def q(u): return u*u # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ - - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ - + theta*u*inner(grad(v), n)*ds \ - f*v*dx \ + q(u)*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - inner(n, grad(u))*v*ds \ + + gamma_ext*u*v*ds \ + + theta*u*inner(grad(v), n)*ds \ + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/nonlinear/nonlinear_dg_matrix_free.mini b/test/nonlinear/nonlinear_dg_matrix_free.mini index 8a1de68470a4375d05253337686105bcb35e6fea..dfd4035d77c067a4678a4c834759fe63ef750f1f 100644 --- a/test/nonlinear/nonlinear_dg_matrix_free.mini +++ b/test/nonlinear/nonlinear_dg_matrix_free.mini @@ -11,9 +11,11 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 5e-3 + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num matrix_free = 1 -compare_l2errorsquared = 5e-3 # Disable numerical methods (not working in PDELab?) {__exec_suffix} == numdiff | exclude diff --git a/test/nonlinear/nonlinear_matrix_free.mini b/test/nonlinear/nonlinear_matrix_free.mini index 644297ab361203ec049c27529904d8b09dd2261a..0f28479de167747bebb646b4ad3c4852a417192a 100644 --- a/test/nonlinear/nonlinear_matrix_free.mini +++ b/test/nonlinear/nonlinear_matrix_free.mini @@ -11,6 +11,8 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 6e-4 + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num matrix_free = 1 -compare_l2errorsquared = 6e-4 diff --git a/test/poisson/CMakeLists.txt b/test/poisson/CMakeLists.txt index 68c635bc79eef0818780ca09d36c808c1eb01954..a05cbe8c3650c9e33f8f5c4f1bbce696b2df9ba8 100644 --- a/test/poisson/CMakeLists.txt +++ b/test/poisson/CMakeLists.txt @@ -67,6 +67,12 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl INIFILE poisson_dg_tensor.mini ) +# 12. Poisson Test Case with a custom function +dune_add_formcompiler_system_test(UFLFILE poisson_customfunction.ufl + BASENAME poisson_customfunction + INIFILE poisson_customfunction.mini + ) + # the reference vtk file add_executable(poisson_dg_ref reference_main.cc) set_target_properties(poisson_dg_ref PROPERTIES EXCLUDE_FROM_ALL 1) diff --git a/test/poisson/dimension-grid-variations/poisson_1d_cg_interval.ufl b/test/poisson/dimension-grid-variations/poisson_1d_cg_interval.ufl index 5eefb2d7c12d0e18f1638949abb4bd0524512fd8..d584ea4b577deeec3a893e2ca636d0371c36fb5f 100644 --- a/test/poisson/dimension-grid-variations/poisson_1d_cg_interval.ufl +++ b/test/poisson/dimension-grid-variations/poisson_1d_cg_interval.ufl @@ -9,7 +9,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/poisson/dimension-grid-variations/poisson_1d_dg_interval.ufl b/test/poisson/dimension-grid-variations/poisson_1d_dg_interval.ufl index 535d102440fa6c5f5405f61ab0ef82285dfeaa7a..51776b3acb57f87519bb2d493a1e833034c29b3d 100644 --- a/test/poisson/dimension-grid-variations/poisson_1d_dg_interval.ufl +++ b/test/poisson/dimension-grid-variations/poisson_1d_dg_interval.ufl @@ -1,11 +1,13 @@ cell = "interval" +degree = 1 +dim = 1 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 g = exp(-1.*c) f = 2*(1.-2*c)*g -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -13,21 +15,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/poisson/dimension-grid-variations/poisson_2d_cg_quadrilateral.ufl b/test/poisson/dimension-grid-variations/poisson_2d_cg_quadrilateral.ufl index d3771aacfe8f806cb3fd1e31d9ba581106885247..5d1921828127bbe2024d36c8674bbe4d9c868190 100644 --- a/test/poisson/dimension-grid-variations/poisson_2d_cg_quadrilateral.ufl +++ b/test/poisson/dimension-grid-variations/poisson_2d_cg_quadrilateral.ufl @@ -9,7 +9,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/poisson/dimension-grid-variations/poisson_2d_cg_triangle.ufl b/test/poisson/dimension-grid-variations/poisson_2d_cg_triangle.ufl index e95064dfb1e0d947c51a1c021ac4f0f7a9f7adc1..a720e454218bd38347e62246a84e61fb38d5a0ac 100644 --- a/test/poisson/dimension-grid-variations/poisson_2d_cg_triangle.ufl +++ b/test/poisson/dimension-grid-variations/poisson_2d_cg_triangle.ufl @@ -8,7 +8,7 @@ V = FiniteElement("CG", "triangle", 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/poisson/dimension-grid-variations/poisson_2d_dg_quadrilateral.ufl b/test/poisson/dimension-grid-variations/poisson_2d_dg_quadrilateral.ufl index 84019c1e87a0a368429d49a9c787344dc6bcdaf2..87fd9b591b7d6ce406b655d52926d7117218a90d 100644 --- a/test/poisson/dimension-grid-variations/poisson_2d_dg_quadrilateral.ufl +++ b/test/poisson/dimension-grid-variations/poisson_2d_dg_quadrilateral.ufl @@ -1,11 +1,13 @@ cell = "quadrilateral" +degree = 1 +dim = 2 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 + (0.5-x[1])**2 g = exp(-1.*c) f = 2*(2.-2*c)*g -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -13,21 +15,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/poisson/dimension-grid-variations/poisson_2d_dg_triangle.ufl b/test/poisson/dimension-grid-variations/poisson_2d_dg_triangle.ufl index d518b11815bc40279f08f2a8802f41c02d2f5703..250b6900d28fa305534334c37d6e07563ad36aa5 100644 --- a/test/poisson/dimension-grid-variations/poisson_2d_dg_triangle.ufl +++ b/test/poisson/dimension-grid-variations/poisson_2d_dg_triangle.ufl @@ -1,10 +1,12 @@ cell = triangle +degree = 1 +dim = 2 x = SpatialCoordinate(cell) f = -4. g = x[0]*x[0] + x[1]*x[1] -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -12,21 +14,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/poisson/dimension-grid-variations/poisson_3d_cg_hexahedron.ufl b/test/poisson/dimension-grid-variations/poisson_3d_cg_hexahedron.ufl index 93e880daec3964f358b303d02dcb45bca0314743..9a9b16fc17df5b1d9df5d96b7e7e9e49853a09c5 100644 --- a/test/poisson/dimension-grid-variations/poisson_3d_cg_hexahedron.ufl +++ b/test/poisson/dimension-grid-variations/poisson_3d_cg_hexahedron.ufl @@ -8,7 +8,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/poisson/dimension-grid-variations/poisson_3d_cg_tetrahedron.ufl b/test/poisson/dimension-grid-variations/poisson_3d_cg_tetrahedron.ufl index 61ebabe881158548f770b69b999759285520439e..b9df144c0534c800675cdeaf95343cb504b82bfe 100644 --- a/test/poisson/dimension-grid-variations/poisson_3d_cg_tetrahedron.ufl +++ b/test/poisson/dimension-grid-variations/poisson_3d_cg_tetrahedron.ufl @@ -8,7 +8,7 @@ V = FiniteElement("CG", "tetrahedron", 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/poisson/dimension-grid-variations/poisson_3d_dg_hexahedron.ufl b/test/poisson/dimension-grid-variations/poisson_3d_dg_hexahedron.ufl index 8abb5fb0d42197f3b87f38389f222efafeccb30b..810bfa46234b7a3951173cb36e9825a3dd7338c0 100644 --- a/test/poisson/dimension-grid-variations/poisson_3d_dg_hexahedron.ufl +++ b/test/poisson/dimension-grid-variations/poisson_3d_dg_hexahedron.ufl @@ -1,11 +1,13 @@ cell = "hexahedron" +degree = 1 +dim = 3 x = SpatialCoordinate(cell) c = (0.5 - x[0])**2 + (0.5 - x[1])**2 + (0.5 - x[2])**2 g = exp(-1.*c) f = 2*(3.-2*c)*g -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -13,21 +15,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] -exact_solution = g +exact_solution = g \ No newline at end of file diff --git a/test/poisson/dimension-grid-variations/poisson_3d_dg_tetrahedron.ufl b/test/poisson/dimension-grid-variations/poisson_3d_dg_tetrahedron.ufl index 384d582e45cd0d820e0d57ef40abb2d07ada88e2..f3af6ada266ea8b3099e37ebc544688bf6bfca18 100644 --- a/test/poisson/dimension-grid-variations/poisson_3d_dg_tetrahedron.ufl +++ b/test/poisson/dimension-grid-variations/poisson_3d_dg_tetrahedron.ufl @@ -1,10 +1,12 @@ cell = "tetrahedron" +degree = 1 +dim = 3 x = SpatialCoordinate(cell) f = -6. g = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -12,21 +14,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] -exact_solution = g +exact_solution = g \ No newline at end of file diff --git a/test/poisson/opcount_poisson_dg.ufl b/test/poisson/opcount_poisson_dg.ufl index 1748e05796f0a0c82073c5b7fb24db9631891c71..4962ce0cefce2cd5ed901994eb8a3b8b75f187a9 100644 --- a/test/poisson/opcount_poisson_dg.ufl +++ b/test/poisson/opcount_poisson_dg.ufl @@ -1,9 +1,9 @@ -degree = 1 cell = "quadrilateral" +degree = 1 dim = 2 -x = SpatialCoordinate(cell) +x = SpatialCoordinate(cell) f = -4. g = x[0]*x[0] + x[1]*x[1] @@ -25,15 +25,14 @@ gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + gamma_int*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma_ext*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] -exact_solution = g +exact_solution = g \ No newline at end of file diff --git a/test/poisson/poisson.mini b/test/poisson/poisson.mini index 3597ac0536fddfa5dcb83f5e6f4fdafe09419360..6fac6a11ae0e1f70024452f5c61eb7850ce55450 100644 --- a/test/poisson/poisson.mini +++ b/test/poisson/poisson.mini @@ -12,5 +12,7 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/poisson/poisson.ufl b/test/poisson/poisson.ufl index 2bfe33131e8fc3456a1d68781874a1bcae0f87af..5c6cf421ebc94b7419a8aa1d8d6df29f28cf0bb2 100644 --- a/test/poisson/poisson.ufl +++ b/test/poisson/poisson.ufl @@ -11,7 +11,7 @@ u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g -dirichlet_expression = g -is_dirichlet = 1 \ No newline at end of file +interpolate_expression = g +is_dirichlet = 1 diff --git a/test/poisson/poisson_customfunction.mini b/test/poisson/poisson_customfunction.mini new file mode 100644 index 0000000000000000000000000000000000000000..c45e50fabd2b2b7743a0a6d94a3eb88e6926800f --- /dev/null +++ b/test/poisson/poisson_customfunction.mini @@ -0,0 +1,18 @@ +__name = poisson_customfunction_{__exec_suffix} +__exec_suffix = numdiff, symdiff | expand num + +lowerleft = 0.0 0.0 +upperright = 1.0 1.0 +elements = 32 32 +elementType = simplical + +[wrapper.vtkcompare] +name = {__name} +reference = poisson_ref +extension = vtu + +[formcompiler] +compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/poisson/poisson_customfunction.ufl b/test/poisson/poisson_customfunction.ufl new file mode 100644 index 0000000000000000000000000000000000000000..026e069d697e8bfe05e8393abaf1118169f884a3 --- /dev/null +++ b/test/poisson/poisson_customfunction.ufl @@ -0,0 +1,33 @@ +import ufl + +cell = triangle + +x = SpatialCoordinate(cell) + +class SquareFct(ufl.classes.MathFunction): + def __init__(self, arg): + ufl.classes.MathFunction.__init__(self, 'square', arg) + + def _ufl_expr_reconstruct_(self, *operands): + return SquareFct(*operands) + + def derivative(self): + return 2 * self.ufl_operands[0] + + def visit(self, visitor): + op = visitor.call(self.ufl_operands[0]) + return op * op + + +c = SquareFct(0.5-x[0]) + SquareFct(0.5-x[1]) +g = exp(-1.*c) +f = 4*(1.-c)*g + +V = FiniteElement("CG", cell, 1) +u = TrialFunction(V) +v = TestFunction(V) + +r = (inner(grad(u), grad(v)) - f*v)*dx +exact_solution = g +interpolate_expression = g +is_dirichlet = 1 \ No newline at end of file diff --git a/test/poisson/poisson_dg.mini b/test/poisson/poisson_dg.mini index fd859d6045eb496ab40e5a67a69d14e289bd9d19..bb806382e6b6fe110e1149bd58821bba2820a33e 100644 --- a/test/poisson/poisson_dg.mini +++ b/test/poisson/poisson_dg.mini @@ -12,5 +12,7 @@ reference = poisson_dg_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 9e-8 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num \ No newline at end of file diff --git a/test/poisson/poisson_dg.ufl b/test/poisson/poisson_dg.ufl index 9ac02bdf061feb9788488ba188e9d3b864ed59e0..75b536b851aa6752ea3c7505087f61f7f85177bd 100644 --- a/test/poisson/poisson_dg.ufl +++ b/test/poisson/poisson_dg.ufl @@ -25,15 +25,14 @@ gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + gamma_int*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma_ext*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g \ No newline at end of file diff --git a/test/poisson/poisson_dg_matrix_free.mini b/test/poisson/poisson_dg_matrix_free.mini index 4fa26853c64f1075cc0de65ba1937890066bf54b..0d8d4cd6734673b944009f8377847588879d4a37 100644 --- a/test/poisson/poisson_dg_matrix_free.mini +++ b/test/poisson/poisson_dg_matrix_free.mini @@ -12,6 +12,8 @@ reference = poisson_dg_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num -matrix_free = 1 compare_l2errorsquared = 1e-6 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num +matrix_free = 1 \ No newline at end of file diff --git a/test/poisson/poisson_dg_neumann.mini b/test/poisson/poisson_dg_neumann.mini index 43157de9980380b50d1a5f8a29df2997309ef7cd..7d930fb92e7813ff37c9c4ea49bdc7c2ae34d80b 100644 --- a/test/poisson/poisson_dg_neumann.mini +++ b/test/poisson/poisson_dg_neumann.mini @@ -12,5 +12,7 @@ reference = poisson_dg_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 9e-8 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/poisson/poisson_dg_neumann.ufl b/test/poisson/poisson_dg_neumann.ufl index 3cedfc0877164e86e6edef343de11784a99d1d0e..bdcea093ef8d0d0b53f4a965513d5d8ae1b058e2 100644 --- a/test/poisson/poisson_dg_neumann.ufl +++ b/test/poisson/poisson_dg_neumann.ufl @@ -1,6 +1,6 @@ -dim = 3 -degree = 1 cell = triangle +degree = 1 +dim = 2 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 + (0.5-x[1])**2 @@ -30,16 +30,15 @@ gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + gamma_int*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds(1) \ + gamma_ext*u*v*ds(1) \ + theta*u*inner(grad(v), n)*ds(1) \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds(1) \ - gamma_ext*g*v*ds(1) \ + - theta*g*inner(grad(v), n)*ds(1) \ - j*v*ds(0) -forms = [r] exact_solution = g diff --git a/test/poisson/poisson_dg_quadrilateral.mini b/test/poisson/poisson_dg_quadrilateral.mini index 05da536acc9dd1864dd59f5d817f397e131ed541..dde2e495ab819ed22be6b848518ed71d41cfe30f 100644 --- a/test/poisson/poisson_dg_quadrilateral.mini +++ b/test/poisson/poisson_dg_quadrilateral.mini @@ -10,5 +10,7 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 7e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/poisson/poisson_dg_quadrilateral.ufl b/test/poisson/poisson_dg_quadrilateral.ufl index 30b1eb8bcd225600c9f78801f3498077388444f3..7387894341f25221d412ae967717a3bfca8788f4 100644 --- a/test/poisson/poisson_dg_quadrilateral.ufl +++ b/test/poisson/poisson_dg_quadrilateral.ufl @@ -1,11 +1,13 @@ cell = "quadrilateral" +degree = 1 +dim = 2 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 + (0.5-x[1])**2 g = exp(-1.*c) f = 4*(1.-c)*g -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -13,21 +15,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/poisson/poisson_dg_tensor.mini b/test/poisson/poisson_dg_tensor.mini index 52df8e1da0f9cadae3d553e00092c053dafac4c1..d696cebc5196bd711ef4e8e2e1371a977be3a31f 100644 --- a/test/poisson/poisson_dg_tensor.mini +++ b/test/poisson/poisson_dg_tensor.mini @@ -12,5 +12,7 @@ reference = poisson_dg_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 4e-6 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num \ No newline at end of file diff --git a/test/poisson/poisson_dg_tensor.ufl b/test/poisson/poisson_dg_tensor.ufl index 9409ece67ae08270896caf5adf87ced67ebced01..ba8eb74d084f7e4b3efc9e940beb616e305b609c 100644 --- a/test/poisson/poisson_dg_tensor.ufl +++ b/test/poisson/poisson_dg_tensor.ufl @@ -1,6 +1,6 @@ -dim = 2 -degree = 1 cell = quadrilateral +degree = 1 +dim = 2 x = SpatialCoordinate(cell) @@ -28,15 +28,14 @@ gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = 1.0 r = (inner(A*grad(u), grad(v)) + c*u*v)*dx \ - + inner(n, A*avg(grad(u)))*jump(v)*dS \ + - f*v*dx \ + - inner(n, A*avg(grad(u)))*jump(v)*dS \ + gamma_int*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(A*avg(grad(v)), n)*dS \ + + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \ - inner(n, A*grad(u))*v*ds \ + gamma_ext*u*v*ds \ + theta*u*inner(A*grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(A*grad(v), n)*ds \ - - gamma_ext*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(A*grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/poisson/poisson_matrix_free.mini b/test/poisson/poisson_matrix_free.mini index 5709ef999b6447810847c80948fb010994fd07fc..3372c8ac453f91d001c3178110e1d6caacec6b48 100644 --- a/test/poisson/poisson_matrix_free.mini +++ b/test/poisson/poisson_matrix_free.mini @@ -11,5 +11,7 @@ reference = poisson_ref extension = vtu [formcompiler] -matrix_free = 1 compare_l2errorsquared = 1e-7 + +[formcompiler.r] +matrix_free = 1 \ No newline at end of file diff --git a/test/poisson/poisson_neumann.mini b/test/poisson/poisson_neumann.mini index 0c4aa9c7ff2f7470f9f39e146c933dbb4f452ee0..76a1fa9e12af16426692c197b80a14599aaee74b 100644 --- a/test/poisson/poisson_neumann.mini +++ b/test/poisson/poisson_neumann.mini @@ -12,5 +12,7 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 8e-8 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/poisson/poisson_neumann.ufl b/test/poisson/poisson_neumann.ufl index d951842134b2859689629189bed631c28caf2442..16eea674b5fd40003af4817ee2c43d461b45e15e 100644 --- a/test/poisson/poisson_neumann.ufl +++ b/test/poisson/poisson_neumann.ufl @@ -16,7 +16,7 @@ v = TestFunction(V) # Define the boundary measure that knows where we are... ds = ds(subdomain_data=bctype) -forms = [(inner(grad(u), grad(v)) - f*v)*dx - j*v*ds(0)] +r = (inner(grad(u), grad(v)) - f*v)*dx - j*v*ds(0) exact_solution = g is_dirichlet = bctype -dirichlet_expression = g \ No newline at end of file +interpolate_expression = g \ No newline at end of file diff --git a/test/poisson/poisson_tensor.mini b/test/poisson/poisson_tensor.mini index 8711de545698818505fc63e8701206bce8acdf54..ec4d2c310bf6ba32e9d17aca53aeba00f6cbc4ee 100644 --- a/test/poisson/poisson_tensor.mini +++ b/test/poisson/poisson_tensor.mini @@ -12,5 +12,7 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num -compare_l2errorsquared = 1e-7 \ No newline at end of file +compare_l2errorsquared = 1e-7 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num \ No newline at end of file diff --git a/test/poisson/poisson_tensor.ufl b/test/poisson/poisson_tensor.ufl index 7208c1e0271e91d6ece79ee388510dc2f64a7a25..b527d05258667dae629f608a1a630e5f11f947b8 100644 --- a/test/poisson/poisson_tensor.ufl +++ b/test/poisson/poisson_tensor.ufl @@ -12,7 +12,7 @@ V = FiniteElement("CG", cell, 1) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx] +r= (inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g \ No newline at end of file +interpolate_expression = g \ No newline at end of file diff --git a/test/stokes/stokes.mini b/test/stokes/stokes.mini index c236f3a87ddec3867557ef6056f3096c04f4be1e..b281b6bbe7755097c091007d1c9ed0b6840afad2 100644 --- a/test/stokes/stokes.mini +++ b/test/stokes/stokes.mini @@ -13,5 +13,7 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-11 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num diff --git a/test/stokes/stokes.ufl b/test/stokes/stokes.ufl index 4307fbeee64a03e4ace5cb63931458ae5d9edbcf..99f21bbc943fd51fa1e514b7438e776e8a91e1f0 100644 --- a/test/stokes/stokes.ufl +++ b/test/stokes/stokes.ufl @@ -13,7 +13,6 @@ u, p = TrialFunctions(TH) r = (inner(grad(v), grad(u)) - div(v)*p - q*div(u))*dx -forms = [r] is_dirichlet = v_bctype, v_bctype, 0 -dirichlet_expression = g_v, None +interpolate_expression = g_v, None exact_solution = g_v, 8.*(1.-x[0]) \ No newline at end of file diff --git a/test/stokes/stokes_3d_dg_quadrilateral.mini b/test/stokes/stokes_3d_dg_quadrilateral.mini index d7c82422aab9a3f4522d9ba28c41514da2345f8f..59396277ede15f6a563d04fb448c4d3b5a445b3b 100644 --- a/test/stokes/stokes_3d_dg_quadrilateral.mini +++ b/test/stokes/stokes_3d_dg_quadrilateral.mini @@ -10,5 +10,7 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 6e-8 + +[formcompiler.r] numerical_jacobian = 0, 1 | expand num -compare_l2errorsquared = 6e-8 \ No newline at end of file diff --git a/test/stokes/stokes_3d_dg_quadrilateral.ufl b/test/stokes/stokes_3d_dg_quadrilateral.ufl index 84d1003e16d7f4f36dc0630434d98eb3d633cd3a..c47773f5821ce09ecb6182e81eeaff6bf17a7fc7 100644 --- a/test/stokes/stokes_3d_dg_quadrilateral.ufl +++ b/test/stokes/stokes_3d_dg_quadrilateral.ufl @@ -1,11 +1,13 @@ cell = hexahedron +degree = 2 +dim = 3 x = SpatialCoordinate(cell) g_v = as_vector((4.*x[1]*(1.-x[1]), 0.0, 0.0)) bctype = conditional(x[0] < 1. - 1e-8, 1, 0) -P2 = VectorElement("DG", cell, 2) -P1 = FiniteElement("DG", cell, 1) +P2 = VectorElement("DG", cell, degree) +P1 = FiniteElement("DG", cell, degree-1) TH = P2 * P1 v, q = TestFunctions(TH) @@ -14,23 +16,29 @@ u, p = TrialFunctions(TH) ds = ds(subdomain_id=1, subdomain_data=bctype) n = FacetNormal(cell)('+') -eps = -1.0 -sigma = 1.0 -h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ - + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + + gamma_int * inner(jump(u), jump(v))*dS \ + + theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ - + sigma / h_e * inner(u-g_v, v)*ds \ - + eps * inner(grad(v)*n, u-g_v)*ds \ + + gamma_ext * inner(u-g_v, v)*ds \ + + theta * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8.*(1.-x[0]) diff --git a/test/stokes/stokes_3d_quadrilateral.mini b/test/stokes/stokes_3d_quadrilateral.mini index 89c4796da75f3212ca59f44cace5a53229b1259a..17f3d9f510054ff9014830e258edd7840180248e 100644 --- a/test/stokes/stokes_3d_quadrilateral.mini +++ b/test/stokes/stokes_3d_quadrilateral.mini @@ -11,5 +11,7 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-10 + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num -compare_l2errorsquared = 1e-10 \ No newline at end of file diff --git a/test/stokes/stokes_3d_quadrilateral.ufl b/test/stokes/stokes_3d_quadrilateral.ufl index 0888298844ccf8e7ebda072c26c4908e96565072..f39cdcd42803cc94fd03ca98059707bafc1a844a 100644 --- a/test/stokes/stokes_3d_quadrilateral.ufl +++ b/test/stokes/stokes_3d_quadrilateral.ufl @@ -13,7 +13,6 @@ u, p = TrialFunctions(TH) r = (inner(grad(v), grad(u)) - div(v)*p - q*div(u))*dx -forms = [r] exact_solution = g_v, 8.*(1.-x[0]) is_dirichlet = v_bctype, v_bctype, v_bctype, 0 -dirichlet_expression = g_v, None +interpolate_expression = g_v, None diff --git a/test/stokes/stokes_dg.mini b/test/stokes/stokes_dg.mini index 253a347941a128a1f2b38d736a055c6f827a3088..2fa0e00a83866ef7d1e3fa5036181e91973a92fa 100644 --- a/test/stokes/stokes_dg.mini +++ b/test/stokes/stokes_dg.mini @@ -15,5 +15,7 @@ zeroThreshold.data_0 = 1e-6 zeroThreshold.data_1 = 1e-6 [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-9 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num diff --git a/test/stokes/stokes_dg.ufl b/test/stokes/stokes_dg.ufl index e7176b264efaf2bbb20146d5c19cd8d4b72a2c45..d4f4225b939d228fc4436d482a3cc989a97ad78f 100644 --- a/test/stokes/stokes_dg.ufl +++ b/test/stokes/stokes_dg.ufl @@ -1,11 +1,13 @@ cell = triangle +degree = 2 +dim = 2 x = SpatialCoordinate(cell) g_v = as_vector((4*x[1]*(1.-x[1]), 0.0)) bctype = conditional(x[0] < 1. - 1e-8, 1, 0) -P2 = VectorElement("DG", cell, 2) -P1 = FiniteElement("DG", cell, 1) +P2 = VectorElement("DG", cell, degree) +P1 = FiniteElement("DG", cell, degree-1) TH = P2 * P1 v, q = TestFunctions(TH) @@ -14,23 +16,29 @@ u, p = TrialFunctions(TH) ds = ds(subdomain_id=1, subdomain_data=bctype) n = FacetNormal(cell)('+') -eps = -1.0 -sigma = 1.0 -h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ - + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + + gamma_int * inner(jump(u), jump(v))*dS \ + + theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ - + sigma / h_e * inner(u-g_v, v)*ds \ - + eps * inner(grad(v)*n, u-g_v)*ds \ + + gamma_ext * inner(u-g_v, v)*ds \ + + theta * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8*(1.-x[0]) \ No newline at end of file diff --git a/test/stokes/stokes_dg_quadrilateral.mini b/test/stokes/stokes_dg_quadrilateral.mini index 7f25099677036a8bfe1f715984ee500d9dc4d015..78954b12873569589c2874d15858c6121a242eb5 100644 --- a/test/stokes/stokes_dg_quadrilateral.mini +++ b/test/stokes/stokes_dg_quadrilateral.mini @@ -10,5 +10,7 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-8 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num diff --git a/test/stokes/stokes_dg_quadrilateral.ufl b/test/stokes/stokes_dg_quadrilateral.ufl index 8f4415a7a3c30635e9add35faf58336995a820e4..0b37429b54cbe4e77fad3eb0abd88d9e345b7dcf 100644 --- a/test/stokes/stokes_dg_quadrilateral.ufl +++ b/test/stokes/stokes_dg_quadrilateral.ufl @@ -1,11 +1,13 @@ cell = quadrilateral +degree = 2 +dim = 2 x = SpatialCoordinate(cell) g_v = as_vector((4*x[1]*(1.-x[1]), 0.0)) bctype = conditional(x[0] < 1. - 1e-8, 1, 0) -P2 = VectorElement("DG", cell, 2) -P1 = FiniteElement("DG", cell, 1) +P2 = VectorElement("DG", cell, degree) +P1 = FiniteElement("DG", cell, degree-1) TH = P2 * P1 v, q = TestFunctions(TH) @@ -14,23 +16,29 @@ u, p = TrialFunctions(TH) ds = ds(subdomain_id=1, subdomain_data=bctype) n = FacetNormal(cell)('+') -eps = -1.0 -sigma = 1.0 -h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = -1.0 + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ - + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + + gamma_int * inner(jump(u), jump(v))*dS \ + + theta * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ - + sigma / h_e * inner(u-g_v, v)*ds \ - + eps * inner(grad(v)*n, u-g_v)*ds \ + + gamma_ext * inner(u-g_v, v)*ds \ + + theta * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8*(1.-x[0]) diff --git a/test/stokes/stokes_quadrilateral.mini b/test/stokes/stokes_quadrilateral.mini index e9440771716292bda9664a26b2c8911e00a65a34..6ee36e8220463cbb75764a91b0ae1d2970f28eb3 100644 --- a/test/stokes/stokes_quadrilateral.mini +++ b/test/stokes/stokes_quadrilateral.mini @@ -11,5 +11,7 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-10 + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num diff --git a/test/stokes/stokes_quadrilateral.ufl b/test/stokes/stokes_quadrilateral.ufl index c8f630b84aae43a68221ace670fe9fcb027af47d..4411e791138cf85824f24fc6549d52cf6ef1af0d 100644 --- a/test/stokes/stokes_quadrilateral.ufl +++ b/test/stokes/stokes_quadrilateral.ufl @@ -13,7 +13,6 @@ u, p = TrialFunctions(TH) r = (inner(grad(v), grad(u)) - div(v)*p - q*div(u))*dx -forms = [r] is_dirichlet = v_bctype, v_bctype, 0 -dirichlet_expression = g_v, None +interpolate_expression = g_v, None exact_solution = g_v, 8.*(1.-x[0]) diff --git a/test/stokes/stokes_stress.mini b/test/stokes/stokes_stress.mini index af72867f443e85aa8b824f66a0c155b9b9f4e9a6..9663f5f1f9d329c02e568a169aeacb66ef4c2e63 100644 --- a/test/stokes/stokes_stress.mini +++ b/test/stokes/stokes_stress.mini @@ -15,6 +15,7 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -# numerical_jacobian = 0, 1 | expand num -numerical_jacobian = 1 compare_l2errorsquared = 1e-11 + +[formcompiler.r] +numerical_jacobian = 1 diff --git a/test/stokes/stokes_stress.ufl b/test/stokes/stokes_stress.ufl index a25a73adba1baa7fc141864320f7c639ff9808df..787e5a232383ad9e0767a6fb41e08cedc988943f 100644 --- a/test/stokes/stokes_stress.ufl +++ b/test/stokes/stokes_stress.ufl @@ -14,7 +14,6 @@ u, p, S = TrialFunctions(TH) r = (inner(grad(v), S) + inner(grad(u) - S, T) - div(v)*p - q*div(u))*dx -forms = [r] is_dirichlet = v_bctype, v_bctype, 0, 0, 0, 0, 0 -dirichlet_expression = 4*x[1]*(1.-x[1]), 0.0, None, None, None, None, None +interpolate_expression = 4*x[1]*(1.-x[1]), 0.0, None, None, None, None, None exact_solution = 4*x[1]*(1.-x[1]), 0.0, 8*(1.-x[0]), 0.0, 0.0, -1.*8*x[1] + 4., 0.0 \ No newline at end of file diff --git a/test/stokes/stokes_stress_sym.mini b/test/stokes/stokes_stress_sym.mini index 1aa3d6f087cae99310dd2c7b3a50721f3fd447f6..9646ec0840f422c924e53ef289a87a6cebc6c3cb 100644 --- a/test/stokes/stokes_stress_sym.mini +++ b/test/stokes/stokes_stress_sym.mini @@ -13,5 +13,7 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -numerical_jacobian = 1 compare_l2errorsquared = 1e-6 + +[formcompiler.r] +numerical_jacobian = 1 diff --git a/test/stokes/stokes_stress_sym.ufl b/test/stokes/stokes_stress_sym.ufl index f5dc520c07202ee05b4294b153d90f3dcd5b4ab0..8e2d55dd4cfca7335932b751d5ae2c2bb71aad59 100644 --- a/test/stokes/stokes_stress_sym.ufl +++ b/test/stokes/stokes_stress_sym.ufl @@ -20,7 +20,6 @@ r = (inner(grad(v), S) + inner(2*sym(grad(u)) - S, T) - div(v)*p - q*div(u))*dx # \ # + inner(S.T*n, v)*ds -forms = [r] is_dirichlet = v_bctype, v_bctype, 0, 0, 0, 0 -dirichlet_expression = 4*x[1]*(1.-x[1]), 0.0, None, None, None, None +interpolate_expression = 4*x[1]*(1.-x[1]), 0.0, None, None, None, None exact_solution = 4*x[1]*(1.-x[1]), 0.0, 8*(1.-x[0]), 0.0, 0.0, -1.*8*x[1] + 4. \ No newline at end of file diff --git a/test/stokes/stokes_sym.mini b/test/stokes/stokes_sym.mini index 26cc91467a701e8643b040453943d83a2ece3e96..89dcee74944f7445b78176aafc61a7105d4c5f99 100644 --- a/test/stokes/stokes_sym.mini +++ b/test/stokes/stokes_sym.mini @@ -13,5 +13,7 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-10 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num diff --git a/test/stokes/stokes_sym.ufl b/test/stokes/stokes_sym.ufl index 1ae13db697976a046b461096a530ef315a2d7417..c7fe07ceafa660906bb33e21ad9603758d23ed3a 100644 --- a/test/stokes/stokes_sym.ufl +++ b/test/stokes/stokes_sym.ufl @@ -16,7 +16,6 @@ n = FacetNormal(triangle)('+') r = (inner(2*sym(grad(u)), grad(v)) - div(v)*p - q*div(u))*dx - inner(grad(u).T*n,v)*ds -forms = [r] is_dirichlet = v_bctype, v_bctype, 0 -dirichlet_expression = g_v, None +interpolate_expression = g_v, None exact_solution = g_v, 8.*(1.-x[0]) \ No newline at end of file diff --git a/test/sumfact/hyperbolic/linearacoustics.mini b/test/sumfact/hyperbolic/linearacoustics.mini index 5d113e2b9737a8de3a2859ff99b44b6ba2d284b4..1f7b1fa542172fd28e45321d36ab82dc3cbdc1ef 100644 --- a/test/sumfact/hyperbolic/linearacoustics.mini +++ b/test/sumfact/hyperbolic/linearacoustics.mini @@ -13,6 +13,13 @@ name = {__name} extension = vtu [formcompiler] +explicit_time_stepping = 1 +operators = mass, r + +[formcompiler.mass] +numerical_jacobian = 1 +sumfact = 1 + +[formcompiler.r] numerical_jacobian = 1 sumfact = 1 -explicit_time_stepping = 1 \ No newline at end of file diff --git a/test/sumfact/hyperbolic/linearacoustics.ufl b/test/sumfact/hyperbolic/linearacoustics.ufl index 8b9d48c4433f72395c054c88ea6c4eaeedb9fcb0..5a78e7848578053dbb8d7f75a94c2f901f5831d3 100644 --- a/test/sumfact/hyperbolic/linearacoustics.ufl +++ b/test/sumfact/hyperbolic/linearacoustics.ufl @@ -21,12 +21,11 @@ flux = as_matrix([[q0, q1], [0., rho]]) # Define numerical fluxes to choose from -llf_flux = dot(avg(flux), n) - 0.5*jump(u) +llf_flux = dot(avg(flux), n) + 0.5*jump(u) numerical_flux = llf_flux r = -1. * inner(flux, grad(v))*dx \ - - inner(numerical_flux, jump(v))*dS \ + + inner(numerical_flux, jump(v))*dS \ + inner(u, v)*ds -forms = [mass, r] -dirichlet_expression = f, 0.0, 0.0 +interpolate_expression = f, 0.0, 0.0 diff --git a/test/sumfact/hyperbolic/lineartransport.mini b/test/sumfact/hyperbolic/lineartransport.mini index f4d694b2186ad9a3c764c04fcaf898ac566d55af..4ea0034f57922c41dda48dbac96d8ef89ec426b0 100644 --- a/test/sumfact/hyperbolic/lineartransport.mini +++ b/test/sumfact/hyperbolic/lineartransport.mini @@ -13,10 +13,16 @@ name = {__name} extension = vtu [formcompiler] -sumfact = 1 -#fastdg = 1 # This tests that all mass is transported out of the domain. # While this is not the best of tests, it is something easily checked for. -numerical_jacobian = 1 explicit_time_stepping = 1 compare_l2errorsquared = 1e-10 +operators = mass, r + +[formcompiler.mass] +sumfact = 1 +numerical_jacobian = 1 + +[formcompiler.r] +sumfact = 1 +numerical_jacobian = 1 diff --git a/test/sumfact/hyperbolic/lineartransport.ufl b/test/sumfact/hyperbolic/lineartransport.ufl index 8fa698d935c924f4c79fb9653c1d43e959909ccb..15a8021854222931b8dbef707082e90b406a47dc 100644 --- a/test/sumfact/hyperbolic/lineartransport.ufl +++ b/test/sumfact/hyperbolic/lineartransport.ufl @@ -15,17 +15,16 @@ v = TestFunction(V) beta = as_vector((1., 1.)) n = FacetNormal(cell)('+') -def numerical_flux(normal, outside, inside): +def numerical_flux(normal, inside, outside): return conditional(inner(beta, n) > 0, inside, outside)*inner(beta, n) mass = u*v*dx r = -1.*u*inner(beta, grad(v))*dx \ - - numerical_flux(n, u('+'), u('-'))*jump(v)*dS \ + + numerical_flux(n, u('+'), u('-'))*jump(v)*dS \ + inner(beta, n)*u*v*dso \ - + numerical_flux(n, 0.0, u('-'))*v*dsd + + numerical_flux(n, u('+'), 0.0)*v*dsd -forms = [mass, r] exact_solution = 0.0 is_dirichlet = dirichlet -dirichlet_expression = initial \ No newline at end of file +interpolate_expression = initial \ No newline at end of file diff --git a/test/sumfact/hyperbolic/shallowwater.mini b/test/sumfact/hyperbolic/shallowwater.mini index 8346ebf34a742fcd8afa4c19dec08701c4f81fbe..5f63f48bd0bd55537d1055ff685d0998ac28ad15 100644 --- a/test/sumfact/hyperbolic/shallowwater.mini +++ b/test/sumfact/hyperbolic/shallowwater.mini @@ -14,4 +14,10 @@ extension = vtu [formcompiler] explicit_time_stepping = 1 +operators = mass, r + +[formcompiler.mass] +sumfact = 1 + +[formcompiler.r] sumfact = 1 diff --git a/test/sumfact/hyperbolic/shallowwater.ufl b/test/sumfact/hyperbolic/shallowwater.ufl index 9fca496e9fab29d58e91e79f15fb6fed3b4265af..d0bfc147cf83062855836606edaf4a337f5d21c4 100644 --- a/test/sumfact/hyperbolic/shallowwater.ufl +++ b/test/sumfact/hyperbolic/shallowwater.ufl @@ -27,13 +27,12 @@ bflux = as_matrix([[-q0, -q1], # Define numerical fluxes to choose from -llf_flux = dot(avg(flux), n) - 0.5*jump(u) +llf_flux = dot(avg(flux), n) + 0.5*jump(u) boundary_flux = 0.5*dot(flux + bflux, n) + as_vector([0., q0, q1]) numerical_flux = llf_flux r = -1. * inner(flux, grad(v))*dx \ - - inner(numerical_flux, jump(v))*dS \ + + inner(numerical_flux, jump(v))*dS \ + inner(boundary_flux, v)*ds -forms = [mass, r] -dirichlet_expression = f, 0.0, 0.0 +interpolate_expression = f, 0.0, 0.0 diff --git a/test/sumfact/mass/mass.mini b/test/sumfact/mass/mass.mini index 44870439291a6111c18353f2c80e9be2116d3b28..6b0e9db8144fe18f7cf5e89f016d228d74ae9173 100644 --- a/test/sumfact/mass/mass.mini +++ b/test/sumfact/mass/mass.mini @@ -12,7 +12,7 @@ printmatrix = 1 name = {__name} extension = vtu -[formcompiler] +[formcompiler.r] numerical_jacobian = 1, 0 | expand num vectorization_quadloop = 1, 0 | expand vec sumfact = 1 diff --git a/test/sumfact/mass/mass.ufl b/test/sumfact/mass/mass.ufl index 6434e09f62b28ca9ea003c936997233d31449431..c11e65676418c9472cb77800dd9c8d477ebf7024 100644 --- a/test/sumfact/mass/mass.ufl +++ b/test/sumfact/mass/mass.ufl @@ -6,5 +6,3 @@ u = TrialFunction(V) v = TestFunction(V) r = u * v * dx - -forms = [r] diff --git a/test/sumfact/mass/mass_3d.mini b/test/sumfact/mass/mass_3d.mini index aba93533768a7b5463052c6585f6e45648380b70..fff87d11bb272dc5d5bd57e8f12569581ddc2485 100644 --- a/test/sumfact/mass/mass_3d.mini +++ b/test/sumfact/mass/mass_3d.mini @@ -13,7 +13,7 @@ printmatrix = true name = {__name} extension = vtu -[formcompiler] +[formcompiler.r] numerical_jacobian = 1, 0 | expand num vectorization_quadloop = 1, 0 | expand vec sumfact = 1 diff --git a/test/sumfact/mass/mass_3d.ufl b/test/sumfact/mass/mass_3d.ufl index 5f55103e52f0b84c550e38f68c0acc8d77465793..1336a91db188e0626353294281f109e0d30ad8ae 100644 --- a/test/sumfact/mass/mass_3d.ufl +++ b/test/sumfact/mass/mass_3d.ufl @@ -6,5 +6,3 @@ u = TrialFunction(V) v = TestFunction(V) r = u * v * dx - -forms = [r] diff --git a/test/sumfact/mass/sliced.mini b/test/sumfact/mass/sliced.mini index 90dab43e70b8ddc38830c37afb2dd83b4116f5e7..17d331901a999c5d2d90a0453dfc524183f6e132 100644 --- a/test/sumfact/mass/sliced.mini +++ b/test/sumfact/mass/sliced.mini @@ -9,7 +9,7 @@ printmatrix = true name = {__name} extension = vtu -[formcompiler] +[formcompiler.r] numerical_jacobian = 1 vectorization_strategy = explicit vectorization_horizontal = 1 diff --git a/test/sumfact/poisson/diagonal.mini b/test/sumfact/poisson/diagonal.mini index d3744184c52abd1320aa796bc16249c478afe9a7..298fadba9554771a7bdf4810b15ad7e925861cbf 100644 --- a/test/sumfact/poisson/diagonal.mini +++ b/test/sumfact/poisson/diagonal.mini @@ -8,8 +8,10 @@ name = {__name} extension = vtu [formcompiler] -sumfact = 1 compare_l2errorsquared = 1e-5 + +[formcompiler.r] +sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit vectorization_horizontal = 2 diff --git a/test/sumfact/poisson/opcount_poisson_2d_order2.mini b/test/sumfact/poisson/opcount_poisson_2d_order2.mini index 2350f1137da3df811df6bfa56bb6b373c0b422ab..538189b2e5c14bd745c748fc43e3161c4013cc77 100644 --- a/test/sumfact/poisson/opcount_poisson_2d_order2.mini +++ b/test/sumfact/poisson/opcount_poisson_2d_order2.mini @@ -12,11 +12,12 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 0 compare_l2errorsquared = 1e-8 -sumfact = 1 opcounter = 1 instrumentation_level = 4 +[formcompiler.r] +sumfact = 1 + [formcompiler.ufl_variants] degree = 2 diff --git a/test/sumfact/poisson/opcount_sumfact_poisson_dg_2d_vec.mini b/test/sumfact/poisson/opcount_sumfact_poisson_dg_2d_vec.mini index 063987b96cfb04af6223b426258e412f839582e7..b657c1a2c731e0606093ab3c0e00044afaadd3ae 100644 --- a/test/sumfact/poisson/opcount_sumfact_poisson_dg_2d_vec.mini +++ b/test/sumfact/poisson/opcount_sumfact_poisson_dg_2d_vec.mini @@ -10,11 +10,11 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 0 -sumfact = 1 opcounter = 1 instrumentation_level = 4 +[formcompiler.r] +sumfact = 1 [formcompiler.ufl_variants] degree = 1 diff --git a/test/sumfact/poisson/poisson_2d.mini b/test/sumfact/poisson/poisson_2d.mini index 9fab490cf7b12a362767520975173927971e2382..d9ce1773e575fab9ec9e05057207093c8ea8b747 100644 --- a/test/sumfact/poisson/poisson_2d.mini +++ b/test/sumfact/poisson/poisson_2d.mini @@ -14,8 +14,10 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 4e-5, 4e-9 | expand deg + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num sumfact = 1 vectorization_strategy = explicit, none | expand grad quadrature_order = 2, 4 diff --git a/test/sumfact/poisson/poisson_2d.ufl b/test/sumfact/poisson/poisson_2d.ufl index d2c78a8d2a1a928ae1f941db4e1a337c8d308bbb..f0cecc18e42eb5ee9b7bc4b488bb8e926f34ea8b 100644 --- a/test/sumfact/poisson/poisson_2d.ufl +++ b/test/sumfact/poisson/poisson_2d.ufl @@ -12,7 +12,7 @@ V = TensorProductElement(V_0, V_1, cell=cell) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/sumfact/poisson/poisson_3d.mini b/test/sumfact/poisson/poisson_3d.mini index 2ddbd626bac79456c3b138e0386a8ac94ee9aa15..e3e6da7d29475cedd70db44a2b667d6171ae5c80 100644 --- a/test/sumfact/poisson/poisson_3d.mini +++ b/test/sumfact/poisson/poisson_3d.mini @@ -15,8 +15,10 @@ reference = poisson_ref extension = vtu [formcompiler] -numerical_jacobian = 1, 0 | expand num compare_l2errorsquared = 1e-4, 1e-8 | expand deg + +[formcompiler.r] +numerical_jacobian = 1, 0 | expand num sumfact = 1 vectorization_quadloop = 1, 0 | expand quad vectorization_strategy = explicit, none | expand grad diff --git a/test/sumfact/poisson/poisson_3d.ufl b/test/sumfact/poisson/poisson_3d.ufl index 529db2a042c01c43ffb7c71c390c323dd3c3d4ac..313cec8ec572013d5604d3a9dde332c13f359b3e 100644 --- a/test/sumfact/poisson/poisson_3d.ufl +++ b/test/sumfact/poisson/poisson_3d.ufl @@ -9,7 +9,7 @@ V = FiniteElement("CG", cell, degree) u = TrialFunction(V) v = TestFunction(V) -forms = [(inner(grad(u), grad(v)) - f*v)*dx] +r = (inner(grad(u), grad(v)) - f*v)*dx exact_solution = g is_dirichlet = 1 -dirichlet_expression = g +interpolate_expression = g diff --git a/test/sumfact/poisson/poisson_dg_2d.mini b/test/sumfact/poisson/poisson_dg_2d.mini index 99adc0e31563c6a85ecad18e349ed74adcaf21a0..d6799eac4600300f86303ea63853365698efdf1f 100644 --- a/test/sumfact/poisson/poisson_dg_2d.mini +++ b/test/sumfact/poisson/poisson_dg_2d.mini @@ -14,9 +14,11 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 5e-5, 5e-7 | expand deg + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num sumfact = 1 -compare_l2errorsquared = 5e-5, 5e-7 | expand deg vectorization_quadloop = 1, 0 | expand quad vectorization_strategy = explicit, none | expand grad diff --git a/test/sumfact/poisson/poisson_dg_2d.ufl b/test/sumfact/poisson/poisson_dg_2d.ufl index fefc67d64c9a0b1a6dac9c34423933d149e8c88e..3c2cf8767cdc395643366936529fe44c2a39be5b 100644 --- a/test/sumfact/poisson/poisson_dg_2d.ufl +++ b/test/sumfact/poisson/poisson_dg_2d.ufl @@ -1,4 +1,5 @@ cell = "quadrilateral" +dim = 2 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 + (0.5-x[1])**2 @@ -13,21 +14,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/sumfact/poisson/poisson_dg_3d.mini b/test/sumfact/poisson/poisson_dg_3d.mini index b23fda0eba605025076e6a92dfb295ea06525d00..f0b4ef26f73509e9dea47a1cca366c83a51bfb93 100644 --- a/test/sumfact/poisson/poisson_dg_3d.mini +++ b/test/sumfact/poisson/poisson_dg_3d.mini @@ -14,9 +14,11 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-4, 5e-6 | expand deg + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num sumfact = 1 -compare_l2errorsquared = 1e-4, 5e-6 | expand deg vectorization_quadloop = 1, 0 | expand quad vectorization_strategy = explicit, none | expand grad diff --git a/test/sumfact/poisson/poisson_dg_3d.ufl b/test/sumfact/poisson/poisson_dg_3d.ufl index 80d78c363b27b6e30476a1189aed521b33b65fa8..0f7f0399b8e07ad85e45a20071304e469b1b6133 100644 --- a/test/sumfact/poisson/poisson_dg_3d.ufl +++ b/test/sumfact/poisson/poisson_dg_3d.ufl @@ -1,4 +1,5 @@ -cell = "hexahedron" +cell = hexahedron +dim = 3 x = SpatialCoordinate(cell) c = (0.5-x[0])**2 + (0.5-x[1])**2 + (0.5-x[2])**2 @@ -13,21 +14,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 r = inner(grad(u), grad(v))*dx \ - + inner(n, avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - f*v*dx \ + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + gamma*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - - f*v*dx \ - - theta*g*inner(grad(v), n)*ds \ - - gamma*g*v*ds + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/sumfact/poisson/poisson_dg_tensor.mini b/test/sumfact/poisson/poisson_dg_tensor.mini index 4a45e4a1fa469c6fc2753ad230b486bc55bd1b55..f6884f965eac0a47416af97b26aa849a6be688ad 100644 --- a/test/sumfact/poisson/poisson_dg_tensor.mini +++ b/test/sumfact/poisson/poisson_dg_tensor.mini @@ -12,8 +12,10 @@ name = {__name} extension = vtu [formcompiler] -sumfact = 1 compare_l2errorsquared = 3e-4 + +[formcompiler.r] +sumfact = 1 vectorization_quadloop = 1, 0 | expand quad vectorization_strategy = explicit, none | expand grad diff --git a/test/sumfact/poisson/poisson_dg_tensor.ufl b/test/sumfact/poisson/poisson_dg_tensor.ufl index 0d4b7a79ee8e2b7183b8664bb639990960205e0a..2734383f71d3afddc39c6c72f41ba5545def3b54 100644 --- a/test/sumfact/poisson/poisson_dg_tensor.ufl +++ b/test/sumfact/poisson/poisson_dg_tensor.ufl @@ -1,14 +1,14 @@ cell = hexahedron +dim = 3 x = SpatialCoordinate(cell) - I = Identity(3) A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(3)] for i in range(3)]) g = x[0]**2 + x[1]**2 + x[2]**2 c = 10. f = -6. -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) @@ -16,21 +16,24 @@ v = TestFunction(V) n = FacetNormal(cell)('+') # penalty factor -gamma = 1.0 +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 theta = 1.0 -r = (inner(A*grad(u), grad(v)) + c*u*v)*dx \ - + inner(n, A*avg(grad(u)))*jump(v)*dS \ - + gamma*jump(u)*jump(v)*dS \ - - theta*jump(u)*inner(A*avg(grad(v)), n)*dS \ - - inner(n, A*grad(u))*v*ds \ - + gamma*u*v*ds \ - + theta*u*inner(A*grad(v), n)*ds \ +r = inner(grad(u), grad(v))*dx \ - f*v*dx \ - - theta*g*inner(A*grad(v), n)*ds \ - - gamma*g*v*ds + - inner(n, avg(grad(u)))*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ + + theta*jump(u)*inner(avg(grad(v)), n)*dS \ + - inner(n, grad(u))*v*ds \ + + gamma_ext*u*v*ds \ + + theta*u*inner(grad(v), n)*ds \ + - gamma_ext*g*v*ds \ + - theta*g*inner(grad(v), n)*ds -forms = [r] exact_solution = g diff --git a/test/sumfact/poisson/poisson_fastdg_2d.mini b/test/sumfact/poisson/poisson_fastdg_2d.mini index 541de8712b627f6327076d87586b6d378fa54e78..53012e325a57387003dc42f2af4268f48cbdaa98 100644 --- a/test/sumfact/poisson/poisson_fastdg_2d.mini +++ b/test/sumfact/poisson/poisson_fastdg_2d.mini @@ -12,9 +12,11 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-4 + +[formcompiler.r] numerical_jacobian = 0 sumfact = 1 -compare_l2errorsquared = 1e-4 vectorization_quadloop = 1, 0 | expand quadvec vectorization_strategy = explicit, none | expand gradvec fastdg = 1 diff --git a/test/sumfact/poisson/poisson_fastdg_3d.mini b/test/sumfact/poisson/poisson_fastdg_3d.mini index b5974a4fe62609ca038d315656bf3c9f333f3e32..46552ce9e960e8a53715016e3de0bd6e770b5425 100644 --- a/test/sumfact/poisson/poisson_fastdg_3d.mini +++ b/test/sumfact/poisson/poisson_fastdg_3d.mini @@ -12,9 +12,11 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-4 + +[formcompiler.r] numerical_jacobian = 0 sumfact = 1 -compare_l2errorsquared = 1e-4 vectorization_quadloop = 1, 0 | expand quadvec vectorization_strategy = explicit, none | expand gradvec fastdg = 1 diff --git a/test/sumfact/poisson/sliced.mini b/test/sumfact/poisson/sliced.mini index 858b8c6b6b8804f3cede25236ff29ce66bae010b..55b6fcf7df3b0ee33f06bb97da0e8b555104c161 100644 --- a/test/sumfact/poisson/sliced.mini +++ b/test/sumfact/poisson/sliced.mini @@ -8,8 +8,10 @@ name = {__name} extension = vtu [formcompiler] -sumfact = 1 compare_l2errorsquared = 1e-5 + +[formcompiler.r] +sumfact = 1 vectorization_quadloop = 1 vectorization_strategy = explicit vectorization_horizontal = 1 diff --git a/test/sumfact/stokes/stokes.mini b/test/sumfact/stokes/stokes.mini index 10dca54704f30369881095216a6f5f94c25585e0..cfb89ec57d5504fca215ddc08d862e5e2484fc19 100644 --- a/test/sumfact/stokes/stokes.mini +++ b/test/sumfact/stokes/stokes.mini @@ -12,7 +12,9 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-12 + +[formcompiler.r] numerical_jacobian = 1, 0 | expand num vectorization_quadloop = 1, 0 | expand quad -compare_l2errorsquared = 1e-12 sumfact = 1 diff --git a/test/sumfact/stokes/stokes.ufl b/test/sumfact/stokes/stokes.ufl index fafe0714ccee659ee18ef550cc2a17fba01077e6..9c5cb27a59d7662402a0bc6d65f9818dc4761577 100644 --- a/test/sumfact/stokes/stokes.ufl +++ b/test/sumfact/stokes/stokes.ufl @@ -14,7 +14,6 @@ u, p = TrialFunctions(TH) r = (inner(grad(v), grad(u)) - div(v)*p - q*div(u))*dx -forms = [r] exact_solution = g_v, 8.*(1.-x[0]) -dirichlet_expression = g_v, None +interpolate_expression = g_v, None is_dirichlet = v_bctype, v_bctype, None \ No newline at end of file diff --git a/test/sumfact/stokes/stokes_3d_dg.mini b/test/sumfact/stokes/stokes_3d_dg.mini index b7ec60614d3159d8792b42ab1c416c4926150120..7fb1b22ed1de823afff15f519beb4aa474a3ec11 100644 --- a/test/sumfact/stokes/stokes_3d_dg.mini +++ b/test/sumfact/stokes/stokes_3d_dg.mini @@ -12,7 +12,9 @@ name = {__name} extension = vtu [formcompiler] +compare_l2errorsquared = 1e-10 + +[formcompiler.r] numerical_jacobian = 0 sumfact = 1 -fastdg = 1, 0 | expand fastdg -compare_l2errorsquared = 1e-10 \ No newline at end of file +fastdg = 1, 0 | expand fastdg \ No newline at end of file diff --git a/test/sumfact/stokes/stokes_3d_dg.ufl b/test/sumfact/stokes/stokes_3d_dg.ufl index 84d1003e16d7f4f36dc0630434d98eb3d633cd3a..8193a8933e9a3f16722737802e93199b2739ec30 100644 --- a/test/sumfact/stokes/stokes_3d_dg.ufl +++ b/test/sumfact/stokes/stokes_3d_dg.ufl @@ -21,16 +21,15 @@ h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + + eps * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ + sigma / h_e * inner(u-g_v, v)*ds \ + eps * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8.*(1.-x[0]) diff --git a/test/sumfact/stokes/stokes_dg.mini b/test/sumfact/stokes/stokes_dg.mini index e3374e4a18e844f6f1356ce45b38e5b5212f015f..f34f23422ae888f7ea5d3085b41b87c2ac929346 100644 --- a/test/sumfact/stokes/stokes_dg.mini +++ b/test/sumfact/stokes/stokes_dg.mini @@ -13,9 +13,11 @@ name = {__name} extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num compare_l2errorsquared = 1e-8 + +[formcompiler.r] +numerical_jacobian = 0, 1 | expand num sumfact = 1 fastdg = 1, 0 | expand fastdg -{formcompiler.fastdg} == 1 and {formcompiler.numerical_jacobian} == 1 | exclude \ No newline at end of file +{formcompiler.r.fastdg} == 1 and {formcompiler.r.numerical_jacobian} == 1 | exclude \ No newline at end of file diff --git a/test/sumfact/stokes/stokes_dg.ufl b/test/sumfact/stokes/stokes_dg.ufl index 39c243a00c7b16c857f551813b6bf0f4a99ad065..7f873537ee0c6b5f1015091abef27d660823b331 100644 --- a/test/sumfact/stokes/stokes_dg.ufl +++ b/test/sumfact/stokes/stokes_dg.ufl @@ -21,16 +21,15 @@ h_e = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) r = inner(grad(u), grad(v))*dx \ - p*div(v)*dx \ - q*div(u)*dx \ - + inner(avg(grad(u))*n, jump(v))*dS \ + - inner(avg(grad(u))*n, jump(v))*dS \ + sigma / h_e * inner(jump(u), jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - avg(p)*inner(jump(v), n)*dS \ - - avg(q)*inner(jump(u), n)*dS \ + + eps * inner(avg(grad(v))*n, jump(u))*dS \ + + avg(p)*inner(jump(v), n)*dS \ + + avg(q)*inner(jump(u), n)*dS \ - inner(grad(u)*n, v)*ds \ + sigma / h_e * inner(u-g_v, v)*ds \ + eps * inner(grad(v)*n, u-g_v)*ds \ + p*inner(v, n)*ds \ + q*inner(u-g_v, n)*ds -forms = [r] exact_solution = g_v, 8*(1.-x[0]) \ No newline at end of file