diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index e511ccee2064fd99475b40d0d559502457305513..18fbebab787e6c470fe4672786a72433331d5260 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -82,6 +82,7 @@ class CodegenFormOptionsArray(ImmutableRecord): fastdg = CodegenOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = CodegenOption(default=False, helpstr="Use sumfactorization") sumfact_regular_jacobians = CodegenOption(default=False, helpstr="Generate non sum-factorized jacobians (only useful if sumfact is set)") + sumfact_on_boundary = CodegenOption(default=True, helpstr="Whether boundary integrals should be vectorized. It might not be worth the hassle...") vectorization_quadloop = CodegenOption(default=False, helpstr="whether to generate code with explicit vectorization") vectorization_strategy = CodegenOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|target|autotune") vectorization_not_fully_vectorized_error = CodegenOption(default=False, helpstr="throw an error if nonquadloop vectorization did not fully vectorize") diff --git a/python/dune/codegen/sumfact/switch.py b/python/dune/codegen/sumfact/switch.py index 09c85af539b27c1e9eead6d95dc886f3a2824082..d18ab344b9c1c78d08c85185219a6b2facf95fc7 100644 --- a/python/dune/codegen/sumfact/switch.py +++ b/python/dune/codegen/sumfact/switch.py @@ -3,6 +3,7 @@ import csv from dune.codegen.generation import (backend, + get_backend, get_global_context_value, global_context, ) @@ -12,7 +13,7 @@ from dune.codegen.pdelab.signatures import (assembly_routine_args, assembly_routine_signature, kernel_name, ) -from dune.codegen.options import get_form_option, get_option +from dune.codegen.options import get_form_option, get_option, set_form_option from dune.codegen.cgen.clazz import ClassMember @@ -25,6 +26,14 @@ def generate_kernels_per_integral(integrals): yield generate_kernel(integrals) if measure == "exterior_facet": + # Maybe skip sum factorization on boundary integrals + if not get_form_option("sumfact_on_boundary"): + set_form_option("sumfact", False) + for k in get_backend(interface="generate_kernels_per_integral")(integrals): + yield k + set_form_option("sumfact", True) + return + # Generate all necessary kernels for facedir in range(dim): for facemod in range(2):