diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py
index 9407709a515ef497b9f9b7561f570cf0411dd4ed..eb9d73aef388124ab61ad75faa0aa15f1252d3fb 100644
--- a/python/dune/codegen/options.py
+++ b/python/dune/codegen/options.py
@@ -3,6 +3,7 @@
 from argparse import ArgumentParser
 from os.path import abspath
 from pytools import ImmutableRecord, memoize
+from contextlib import contextmanager
 
 from dune.testtools.parametertree.parser import parse_ini_file
 
@@ -297,21 +298,39 @@ def get_form_option(key, form=None):
     return getattr(processed_form_opts, key)
 
 
-def option_switch(opt):
-    def _switch():
-        if isinstance(opt, tuple):
-            opts = opt
-        else:
-            assert isinstance(opt, str)
-            opts = (opt,)
-        try:
-            for o in opts:
-                if get_option(o):
-                    return o
-            return "default"
-        except AttributeError:
-            for o in opts:
-                if get_form_option(o):
-                    return o
-            return "default"
-    return _switch
+@contextmanager
+def option_context(conditional=True, **opts):
+    """ A context manager that sets a given option and restores it on exit. """
+    # Backup old values and set to new ones
+    if conditional:
+        backup = {}
+        for k, v in opts.items():
+            backup[k] = get_option(k)
+            set_option(k, v)
+
+    yield
+
+    if conditional:
+        # Restore old values
+        for k in opts.keys():
+            set_option(k, backup[k])
+
+
+@contextmanager
+def form_option_context(conditional=True, **opts):
+    """ A context manager that sets a given form option and restores it on exit """
+    if conditional:
+        form = opts.pop("form", None)
+
+        # Backup old values and set to new ones
+        backup = {}
+        for k, v in opts.items():
+            backup[k] = get_form_option(k, form=form)
+            set_form_option(k, v, form=form)
+
+    yield
+
+    # Restore old values
+    if conditional:
+        for k in opts.keys():
+            set_form_option(k, backup[k], form=form)
diff --git a/python/dune/codegen/pdelab/basis.py b/python/dune/codegen/pdelab/basis.py
index 39f867a7836c905d30e4ec124eedb6a214230e62..885194af5817bd30d6cc339603ab84f00bc7b990 100644
--- a/python/dune/codegen/pdelab/basis.py
+++ b/python/dune/codegen/pdelab/basis.py
@@ -8,9 +8,7 @@ from dune.codegen.generation import (basis_mixin,
                                      preamble,
                                      temporary_variable,
                                      )
-from dune.codegen.options import (option_switch,
-                                  get_form_option,
-                                  )
+from dune.codegen.options import get_form_option
 from dune.codegen.pdelab.argument import (name_applycontainer,
                                           name_coefficientcontainer,
                                           )
diff --git a/python/dune/codegen/pdelab/geometry.py b/python/dune/codegen/pdelab/geometry.py
index 434f9f56d1098bd4c69b49bb79274a4c6f224581..df6b91548a5ccf217b3e36a7d176be828a35f374 100644
--- a/python/dune/codegen/pdelab/geometry.py
+++ b/python/dune/codegen/pdelab/geometry.py
@@ -12,9 +12,7 @@ from dune.codegen.generation import (class_member,
                                      temporary_variable,
                                      valuearg,
                                      )
-from dune.codegen.options import (get_form_option,
-                                  option_switch,
-                                  )
+from dune.codegen.options import get_form_option
 from dune.codegen.loopy.target import dtype_floatingpoint, type_floatingpoint
 from dune.codegen.pdelab.quadrature import (quadrature_preamble,
                                             )
diff --git a/python/dune/codegen/pdelab/localoperator.py b/python/dune/codegen/pdelab/localoperator.py
index a9293b582a50eef979efb7ecc63f2ab984a20563..835fe44e24df9f0849391e1f50422512df43c044 100644
--- a/python/dune/codegen/pdelab/localoperator.py
+++ b/python/dune/codegen/pdelab/localoperator.py
@@ -7,8 +7,8 @@ import numpy as np
 
 from dune.codegen.options import (get_form_option,
                                   get_option,
-                                  option_switch,
-                                  set_form_option)
+                                  form_option_context,
+                                  )
 from dune.codegen.generation import (accumulation_mixin,
                                      base_class,
                                      class_basename,
@@ -41,6 +41,7 @@ from dune.codegen.cgen.clazz import (AccessModifier,
                                      )
 from dune.codegen.loopy.target import type_floatingpoint
 from dune.codegen.ufl.modified_terminals import Restriction
+from frozendict import frozendict
 
 import dune.codegen.loopy.mangler
 
@@ -452,7 +453,6 @@ def generate_accumulation_instruction(expr, visitor):
                    )
 
     from dune.codegen.generation import instruction
-    from dune.codegen.options import option_switch
     quad_inames = visitor.quadrature_inames()
     lfs_inames = frozenset(visitor.test_info.inames)
     if visitor.trial_info:
@@ -510,24 +510,35 @@ def visit_integral(integral):
 def generate_kernel(integrals):
     logger = logging.getLogger(__name__)
 
-    # Visit all integrals once to collect information (dry-run)!
-    logger.debug('generate_kernel: visit_integrals (dry run)')
-    with global_context(dry_run=True):
+    # Assert that metadata for a given measure type agrees. This is a limitation
+    # of our current approach that is hard to overcome.
+    def remove_nonuser_metadata(d):
+        return frozendict({k: v for k, v in d.items() if k != "estimated_polynomial_degree"})
+
+    meta_dicts = [remove_nonuser_metadata(i.metadata()) for i in integrals]
+    if len(set(meta_dicts)) > 1:
+        measure = get_global_context_value("measure")
+        raise CodegenUFLError("Measure {} used with varying metadata! dune-codegen does not currently support this.")
+
+    with form_option_context(**meta_dicts[0]):
+        # Visit all integrals once to collect information (dry-run)!
+        logger.debug('generate_kernel: visit_integrals (dry run)')
+        with global_context(dry_run=True):
+            for integral in integrals:
+                visit_integral(integral)
+
+        # Now perform some checks on what should be done
+        from dune.codegen.sumfact.vectorization import decide_vectorization_strategy
+        logger.debug('generate_kernel: decide_vectorization_strategy')
+        decide_vectorization_strategy()
+
+        # Delete the cache contents and do the real thing!
+        logger.debug('generate_kernel: visit_integrals (no dry run)')
+        from dune.codegen.generation import delete_cache_items
+        delete_cache_items("kernel_default")
         for integral in integrals:
             visit_integral(integral)
 
-    # Now perform some checks on what should be done
-    from dune.codegen.sumfact.vectorization import decide_vectorization_strategy
-    logger.debug('generate_kernel: decide_vectorization_strategy')
-    decide_vectorization_strategy()
-
-    # Delete the cache contents and do the real thing!
-    logger.debug('generate_kernel: visit_integrals (no dry run)')
-    from dune.codegen.generation import delete_cache_items
-    delete_cache_items("kernel_default")
-    for integral in integrals:
-        visit_integral(integral)
-
     from dune.codegen.pdelab.signatures import kernel_name, assembly_routine_signature
     name = kernel_name()
     signature = assembly_routine_signature()
@@ -1026,13 +1037,9 @@ def generate_jacobian_kernels(form, original_form):
                         operator_kernels[(it, 'jacobian_apply')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)]
     if get_form_option("generate_jacobians"):
         with global_context(form_type="jacobian"):
-            if get_form_option("generate_jacobians"):
-                if get_form_option("sumfact"):
-                    was_sumfact = True
-                    if get_form_option("sumfact_regular_jacobians"):
-                        old_geometry_mixins = get_form_option("geometry_mixins")
-                        set_form_option("geometry_mixins", "generic")
-                        set_form_option("sumfact", False)
+            with form_option_context(conditional=get_form_option("sumfact") and get_form_option("sumfact_regular_jacobians"),
+                                     geometry_mixins="generic",
+                                     sumfact=False):
                 for measure in set(i.integral_type() for i in jacform.integrals()):
                     if not measure_is_enabled(measure):
                         continue
@@ -1059,10 +1066,6 @@ def generate_jacobian_kernels(form, original_form):
                     with global_context(integral_type=it):
                         from dune.codegen.pdelab.signatures import assembly_routine_signature
                         operator_kernels[(it, 'jacobian')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)]
-                if get_form_option("sumfact_regular_jacobians"):
-                    if was_sumfact:
-                        set_form_option("sumfact", True)
-                        set_form_option("geometry_mixins", old_geometry_mixins)
 
     return operator_kernels
 
diff --git a/python/dune/codegen/pdelab/quadrature.py b/python/dune/codegen/pdelab/quadrature.py
index 9814f6f2d389e4ec46d055ff2892f8058a330ebf..ca44056ce82037124cc3ea2a9781e6eb1feb9692 100644
--- a/python/dune/codegen/pdelab/quadrature.py
+++ b/python/dune/codegen/pdelab/quadrature.py
@@ -203,7 +203,7 @@ def quadrature_order():
       possible to use a different quadrature_order per direction.
     """
     if get_form_option("quadrature_order"):
-        quadrature_order = tuple(map(int, get_form_option("quadrature_order").split(',')))
+        quadrature_order = tuple(map(int, str(get_form_option("quadrature_order")).split(',')))
     else:
         quadrature_order = _estimate_quadrature_order()
 
diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py
index 1e801f4550d17d89ef8972fe3ffa867d4a44ea25..7908ab3515eb43c1f667216481821024076bcf88 100644
--- a/python/dune/codegen/sumfact/autotune.py
+++ b/python/dune/codegen/sumfact/autotune.py
@@ -20,7 +20,7 @@ from cgen import ArrayOf, AlignedAttribute, Initializer
 from dune.codegen.generation import cache_restoring, delete_cache_items
 from dune.codegen.loopy.target import DuneTarget, type_floatingpoint
 from dune.codegen.sumfact.realization import realize_sumfact_kernel_function
-from dune.codegen.options import get_option, set_option
+from dune.codegen.options import get_option, option_context
 from dune.codegen.error import CodegenAutotuneError
 
 
@@ -185,296 +185,281 @@ def generate_standalone_code_google_benchmark(sf, filename):
     delete_cache_items("kernel_default")
 
     # Turn off opcounting
-    opcounting = get_option("opcounter")
-    set_option("opcounter", False)
-
-    # Extract sum factorization kernel
-    from dune.codegen.pdelab.localoperator import extract_kernel_from_cache
-    knl = realize_sumfact_kernel_function(sf)
-
-    # Add the implementation of the kernel.
-    # TODO: This can probably done in a safer way?
-    first_line = knl.member.lines[0]
-    arguments = first_line[first_line.find("(") + 1:first_line.find(")")]
-
-    with open(filename, "w") as f:
-        f.writelines(["// {}".format(first_line),
-                      "\n",
-                      "#include \"config.h\"\n",
-                      "#include \"benchmark/benchmark.h\"\n",
-                      "#include<dune/pdelab/finiteelementmap/qkdg.hh>\n",
-                      "#include<dune/codegen/common/vectorclass.hh>\n",
-                      "#include<dune/codegen/sumfact/onedquadrature.hh>\n",
-                      "#include<dune/codegen/sumfact/horizontaladd.hh>\n",
-                      "#include<random>\n",
-                      "#include<fstream>\n",
-                      "#include<iostream>\n",
-                      "\n"
-                      ])
-
-    write_global_data(sf, filename)
-
-    with open(filename, "a") as f:
-        arguments = ', '.join(sf.interface.signature_args)
-        if len(arguments) > 0:
-            arguments = ', ' + arguments
-        arguments = 'const char* buffer0, const char* buffer1' + arguments
-        f.write("void sumfact_kernel({})\n".format(arguments))
-        for line in knl.member.lines[1:]:
-            f.write("{}\n".format(line))
-
-        f.write("\n\n")
-        f.write("static void BM_sumfact_kernel(benchmark::State& state){\n")
-
-    write_setup_code(sf, filename, define_thetas=False)
-
-    additional_arguments = [i.split()[-1] for i in sf.interface.signature_args]
-    additional_arguments = ', '.join(additional_arguments)
-    if len(additional_arguments) > 0:
-        additional_arguments = ', ' + additional_arguments
-    with open(filename, "a") as f:
-        f.writelines(["  for (auto _ : state){\n",
-                      "    sumfact_kernel(buffer0, buffer1{});\n".format(additional_arguments),
-                      "  }\n",
-                      "}\n",
-                      "BENCHMARK(BM_sumfact_kernel);\n",
-                      "\n",
-                      "BENCHMARK_MAIN();"
-                      ])
+    with option_context(opcounter=False):
+        # Extract sum factorization kernel
+        from dune.codegen.pdelab.localoperator import extract_kernel_from_cache
+        knl = realize_sumfact_kernel_function(sf)
 
-    # Maybe turn opcounting on again
-    set_option("opcounter", opcounting)
+        # Add the implementation of the kernel.
+        # TODO: This can probably done in a safer way?
+        first_line = knl.member.lines[0]
+        arguments = first_line[first_line.find("(") + 1:first_line.find(")")]
+
+        with open(filename, "w") as f:
+            f.writelines(["// {}".format(first_line),
+                          "\n",
+                          "#include \"config.h\"\n",
+                          "#include \"benchmark/benchmark.h\"\n",
+                          "#include<dune/pdelab/finiteelementmap/qkdg.hh>\n",
+                          "#include<dune/codegen/common/vectorclass.hh>\n",
+                          "#include<dune/codegen/sumfact/onedquadrature.hh>\n",
+                          "#include<dune/codegen/sumfact/horizontaladd.hh>\n",
+                          "#include<random>\n",
+                          "#include<fstream>\n",
+                          "#include<iostream>\n",
+                          "\n"
+                          ])
+
+        write_global_data(sf, filename)
+
+        with open(filename, "a") as f:
+            arguments = ', '.join(sf.interface.signature_args)
+            if len(arguments) > 0:
+                arguments = ', ' + arguments
+            arguments = 'const char* buffer0, const char* buffer1' + arguments
+            f.write("void sumfact_kernel({})\n".format(arguments))
+            for line in knl.member.lines[1:]:
+                f.write("{}\n".format(line))
+
+            f.write("\n\n")
+            f.write("static void BM_sumfact_kernel(benchmark::State& state){\n")
+
+        write_setup_code(sf, filename, define_thetas=False)
+
+        additional_arguments = [i.split()[-1] for i in sf.interface.signature_args]
+        additional_arguments = ', '.join(additional_arguments)
+        if len(additional_arguments) > 0:
+            additional_arguments = ', ' + additional_arguments
+        with open(filename, "a") as f:
+            f.writelines(["  for (auto _ : state){\n",
+                          "    sumfact_kernel(buffer0, buffer1{});\n".format(additional_arguments),
+                          "  }\n",
+                          "}\n",
+                          "BENCHMARK(BM_sumfact_kernel);\n",
+                          "\n",
+                          "BENCHMARK_MAIN();"
+                          ])
 
 
 def generate_standalone_code(sf, filename):
     delete_cache_items("kernel_default")
 
     # Turn off opcounting
-    opcounting = get_option("opcounter")
-    set_option("opcounter", False)
-
-    # Extract sum factorization kernel
-    from dune.codegen.pdelab.localoperator import extract_kernel_from_cache
-    knl = realize_sumfact_kernel_function(sf)
-    first_line = knl.member.lines[0]
-
-    with open(filename, "w") as f:
-        f.writelines(["// {}".format(first_line),
-                      "\n",
-                      "#include \"config.h\"\n",
-                      "#include<dune/pdelab/finiteelementmap/qkdg.hh>\n",
-                      "#include<dune/codegen/common/tsc.hh>\n",
-                      "#include<dune/codegen/common/vectorclass.hh>\n",
-                      "#include<dune/codegen/sumfact/onedquadrature.hh>\n",
-                      "#include<dune/codegen/sumfact/horizontaladd.hh>\n",
-                      "#include<random>\n",
-                      "#include<fstream>\n",
-                      "#include<iostream>\n",
-                      "\n"
-                      ])
-
-        f.writelines(["int main(int argc, char** argv)\n",
-                      "{\n",
-                      ])
-
-    write_setup_code(sf, filename)
-
-    # Write measurement
-    with open(filename, "a") as f:
-        # Start a TSC timer
-        f.writelines(["  auto start = Dune::PDELab::TSC::start();\n",
-                      ])
-
-        # Add the implementation of the kernel.
-        repeats = int(1e9 / sf.operations)
-        f.write("  for(int i=0; i<{}; ++i)\n".format(repeats))
-        f.write("  {\n")
-        for line in knl.member.lines[1:]:
-            f.write("    {}\n".format(line))
-        f.write("  }\n")
-
-        # Stop the TSC timer and write the result to a file
-        f.writelines(["  auto stop = Dune::PDELab::TSC::stop();\n",
-                      "  std::ofstream file;\n",
-                      "  file.open(argv[1]);\n",
-                      "  file << Dune::PDELab::TSC::elapsed(start, stop) / {} << std::endl;\n".format(str(float(repeats))),
-                      "  file.close();\n",
-                      "  accum += output[dis(rng)];\n",
-                      "  std::cout << accum;\n",
-                      "}\n",
-                      ])
-
-    # Maybe turn opcounting on again
-    set_option("opcounter", opcounting)
+    with option_context(opcounter=False):
+        # Extract sum factorization kernel
+        from dune.codegen.pdelab.localoperator import extract_kernel_from_cache
+        knl = realize_sumfact_kernel_function(sf)
+        first_line = knl.member.lines[0]
+
+        with open(filename, "w") as f:
+            f.writelines(["// {}".format(first_line),
+                          "\n",
+                          "#include \"config.h\"\n",
+                          "#include<dune/pdelab/finiteelementmap/qkdg.hh>\n",
+                          "#include<dune/codegen/common/tsc.hh>\n",
+                          "#include<dune/codegen/common/vectorclass.hh>\n",
+                          "#include<dune/codegen/sumfact/onedquadrature.hh>\n",
+                          "#include<dune/codegen/sumfact/horizontaladd.hh>\n",
+                          "#include<random>\n",
+                          "#include<fstream>\n",
+                          "#include<iostream>\n",
+                          "\n"
+                          ])
+
+            f.writelines(["int main(int argc, char** argv)\n",
+                          "{\n",
+                          ])
+
+        write_setup_code(sf, filename)
+
+        # Write measurement
+        with open(filename, "a") as f:
+            # Start a TSC timer
+            f.writelines(["  auto start = Dune::PDELab::TSC::start();\n",
+                          ])
+
+            # Add the implementation of the kernel.
+            repeats = int(1e9 / sf.operations)
+            f.write("  for(int i=0; i<{}; ++i)\n".format(repeats))
+            f.write("  {\n")
+            for line in knl.member.lines[1:]:
+                f.write("    {}\n".format(line))
+            f.write("  }\n")
+
+            # Stop the TSC timer and write the result to a file
+            f.writelines(["  auto stop = Dune::PDELab::TSC::stop();\n",
+                          "  std::ofstream file;\n",
+                          "  file.open(argv[1]);\n",
+                          "  file << Dune::PDELab::TSC::elapsed(start, stop) / {} << std::endl;\n".format(str(float(repeats))),
+                          "  file.close();\n",
+                          "  accum += output[dis(rng)];\n",
+                          "  std::cout << accum;\n",
+                          "}\n",
+                          ])
 
 
 def generate_standalone_kernel_code(kernel, signature, filename, transformations=None):
     # Turn off opcounting
-    opcounting = get_option("opcounter")
-    set_option("opcounter", False)
-
-    # Remove opcounter from signature
-    p = re.compile('OpCounter::OpCounter<([^>]*)>')
-    assert len(signature) == 1
-    sig = signature[0]
-    sig = p.sub(r'\1', sig)
-    assert 'OpCounter' not in signature
-
-    # Which transformations were applied
-    codegen_transformations = ''
-    if transformations:
+    with option_context(opcounter=False):
+        # Remove opcounter from signature
+        p = re.compile('OpCounter::OpCounter<([^>]*)>')
+        assert len(signature) == 1
+        sig = signature[0]
+        sig = p.sub(r'\1', sig)
+        assert 'OpCounter' not in signature
+
+        # Which transformations were applied
         codegen_transformations = ''
-        for trafo in transformations:
-            codegen_transformations += '// {}\n'.format(trafo)
-
-    template = 'kernel_benchmark_template1.cc.in'
-    use_datasets = True
-
-    # Old benchmark template
-    # template = 'kernel_benchmark_template0.cc.in'
-    # use_datasets = False
-
-    template_filename = pkg_resources.resource_filename(__name__, template)
-    with open(template_filename, 'r') as f:
-        benchmark = f.read()
-
-    # Find function arguments and global arguments
-    arguments = sig[sig.find('(') + 1:sig.find(')')].split(',')
-    arguments = [a.split(' ')[-1] for a in arguments]
-    global_args = [a for a in kernel.args if a.name not in arguments]
-    buffer_arguments = [a for a in arguments if a.startswith('buff')]
-    input_arguments = [a for a in arguments if a not in buffer_arguments]
-
-    # Declare global arguments
-    codegen_declare_global_arguments = ''
-    target = DuneTarget()
-    for g in global_args:
-        decl_info = g.decl_info(target, True, g.dtype)
-        for idi in decl_info:
-            ast_builder = target.get_device_ast_builder()
-            arg_decl = lp.target.c.POD(ast_builder, idi.dtype, idi.name)
-            arg_decl = ArrayOf(arg_decl, reduce(mul, g.shape))
-            arg_decl = AlignedAttribute(g.dtype.itemsize * g.vector_size(target), arg_decl)
-            codegen_declare_global_arguments += '{}\n'.format(arg_decl)
-    codegen_declare_global_arguments = textwrap.indent(codegen_declare_global_arguments, '  ')
-
-    # Helper function for argument initialization
-    def _initialize_arg(arg):
-        if isinstance(arg, lp.ValueArg):
-            return []
-        real = type_floatingpoint()
-        size = reduce(mul, arg.shape)
-        fill_name = arg.name + '_fill'
-        lines = ['  {}* {} = (double *) {};'.format(real, fill_name, arg.name),
-                 '  for (std::size_t i=0; i<{}; ++i){{'.format(size),
-                 '    {}[i] = unif(re);'.format(fill_name),
-                 '  }']
-        return lines
-
-    # Initialize global arguments
-    codegen_initialize_global_arguments = ''
-    for arg in global_args:
-        lines = _initialize_arg(arg)
-        codegen_initialize_global_arguments += '\n'.join(lines) + '\n'
-    codegen_initialize_global_arguments = textwrap.indent(codegen_initialize_global_arguments, '  ')
-
-    codegen_initialize_input = ''
-
-    # Function we want to benchmark
-    codegen_benchmark_function = ''
-    codegen_benchmark_function += sig[0:sig.find(')') + 1]
-    codegen_benchmark_function += lp.generate_body(kernel)
-    codegen_benchmark_function = textwrap.indent(codegen_benchmark_function, '  ')
-
-    # Declare function arguments
-    codegen_declare_arguments = []
-    codegen_declare_input = []
-    function_arguments = [a for a in kernel.args if a.name in arguments]
-    for arg in function_arguments:
-        if 'buffer' in arg.name:
-            byte_size = reduce(mul, arg.shape) * 8
-            codegen_declare_arguments.append('  char {}[{}] __attribute__ ((aligned ({})));\n'.format(arg.name,
-                                                                                                      byte_size,
-                                                                                                      arg.alignment),)
-        elif isinstance(arg, lp.ValueArg):
-            assert 'jacobian_offset' in arg.name
-            decl = arg.get_arg_decl(ast_builder)
-            decl = Initializer(decl, 'unif_int(re)')
-            codegen_declare_arguments.append(('  {}\n'.format(decl)))
-        else:
-            assert 'fastdg' in arg.name
-            size = reduce(mul, arg.shape)
-            min_stride = min([tag.stride for tag in arg.dim_tags])
-            size *= min_stride
-            alignment = arg.dtype.itemsize
+        if transformations:
+            codegen_transformations = ''
+            for trafo in transformations:
+                codegen_transformations += '// {}\n'.format(trafo)
+
+        template = 'kernel_benchmark_template1.cc.in'
+        use_datasets = True
+
+        # Old benchmark template
+        # template = 'kernel_benchmark_template0.cc.in'
+        # use_datasets = False
+
+        template_filename = pkg_resources.resource_filename(__name__, template)
+        with open(template_filename, 'r') as f:
+            benchmark = f.read()
+
+        # Find function arguments and global arguments
+        arguments = sig[sig.find('(') + 1:sig.find(')')].split(',')
+        arguments = [a.split(' ')[-1] for a in arguments]
+        global_args = [a for a in kernel.args if a.name not in arguments]
+        buffer_arguments = [a for a in arguments if a.startswith('buff')]
+        input_arguments = [a for a in arguments if a not in buffer_arguments]
+
+        # Declare global arguments
+        codegen_declare_global_arguments = ''
+        target = DuneTarget()
+        for g in global_args:
+            decl_info = g.decl_info(target, True, g.dtype)
+            for idi in decl_info:
+                ast_builder = target.get_device_ast_builder()
+                arg_decl = lp.target.c.POD(ast_builder, idi.dtype, idi.name)
+                arg_decl = ArrayOf(arg_decl, reduce(mul, g.shape))
+                arg_decl = AlignedAttribute(g.dtype.itemsize * g.vector_size(target), arg_decl)
+                codegen_declare_global_arguments += '{}\n'.format(arg_decl)
+        codegen_declare_global_arguments = textwrap.indent(codegen_declare_global_arguments, '  ')
+
+        # Helper function for argument initialization
+        def _initialize_arg(arg):
+            if isinstance(arg, lp.ValueArg):
+                return []
             real = type_floatingpoint()
-            if use_datasets:
-                codegen_declare_input.append(('{} {}[datasets][{}] __attribute__ ((aligned ({})));\n'.format(real,
-                                                                                                             arg.name,
-                                                                                                             size,
-                                                                                                             alignment)))
+            size = reduce(mul, arg.shape)
+            fill_name = arg.name + '_fill'
+            lines = ['  {}* {} = (double *) {};'.format(real, fill_name, arg.name),
+                     '  for (std::size_t i=0; i<{}; ++i){{'.format(size),
+                     '    {}[i] = unif(re);'.format(fill_name),
+                     '  }']
+            return lines
+
+        # Initialize global arguments
+        codegen_initialize_global_arguments = ''
+        for arg in global_args:
+            lines = _initialize_arg(arg)
+            codegen_initialize_global_arguments += '\n'.join(lines) + '\n'
+        codegen_initialize_global_arguments = textwrap.indent(codegen_initialize_global_arguments, '  ')
+
+        codegen_initialize_input = ''
+
+        # Function we want to benchmark
+        codegen_benchmark_function = ''
+        codegen_benchmark_function += sig[0:sig.find(')') + 1]
+        codegen_benchmark_function += lp.generate_body(kernel)
+        codegen_benchmark_function = textwrap.indent(codegen_benchmark_function, '  ')
+
+        # Declare function arguments
+        codegen_declare_arguments = []
+        codegen_declare_input = []
+        function_arguments = [a for a in kernel.args if a.name in arguments]
+        for arg in function_arguments:
+            if 'buffer' in arg.name:
+                byte_size = reduce(mul, arg.shape) * 8
+                codegen_declare_arguments.append('  char {}[{}] __attribute__ ((aligned ({})));\n'.format(arg.name,
+                                                                                                          byte_size,
+                                                                                                          arg.alignment),)
+            elif isinstance(arg, lp.ValueArg):
+                assert 'jacobian_offset' in arg.name
+                decl = arg.get_arg_decl(ast_builder)
+                decl = Initializer(decl, 'unif_int(re)')
+                codegen_declare_arguments.append(('  {}\n'.format(decl)))
             else:
-                codegen_declare_input.append(('{} {}[{}] __attribute__ ((aligned ({})));\n'.format(real,
-                                                                                                   arg.name,
-                                                                                                   size,
-                                                                                                   alignment)))
-
-    codegen_declare_arguments = ''.join(codegen_declare_arguments)
-    codegen_declare_arguments = textwrap.indent(codegen_declare_arguments, '  ')
-    codegen_declare_input = ''.join(codegen_declare_input)
-    codegen_declare_input = textwrap.indent(codegen_declare_input, '  ')
-
-    # Initialize function arguments
-    codegen_initialize_arguments = ''
-    codegen_initialize_input = ''
-    for arg in function_arguments:
-        if 'fastdg' in arg.name:
-            if use_datasets:
-                lines = _initialize_arg(arg)
-                lines = ['  ' + a for a in lines]
-                lines = [a.replace(arg.name + ';', arg.name + '[i];') for a in lines]
-                lines.insert(0, 'for(std::size_t i=0; i<datasets; ++i){')
-                lines.append('}')
-                codegen_initialize_input += '\n'.join(lines) + '\n'
+                assert 'fastdg' in arg.name
+                size = reduce(mul, arg.shape)
+                min_stride = min([tag.stride for tag in arg.dim_tags])
+                size *= min_stride
+                alignment = arg.dtype.itemsize
+                real = type_floatingpoint()
+                if use_datasets:
+                    codegen_declare_input.append(('{} {}[datasets][{}] __attribute__ ((aligned ({})));\n'.format(real,
+                                                                                                                 arg.name,
+                                                                                                                 size,
+                                                                                                                 alignment)))
+                else:
+                    codegen_declare_input.append(('{} {}[{}] __attribute__ ((aligned ({})));\n'.format(real,
+                                                                                                       arg.name,
+                                                                                                       size,
+                                                                                                       alignment)))
+
+        codegen_declare_arguments = ''.join(codegen_declare_arguments)
+        codegen_declare_arguments = textwrap.indent(codegen_declare_arguments, '  ')
+        codegen_declare_input = ''.join(codegen_declare_input)
+        codegen_declare_input = textwrap.indent(codegen_declare_input, '  ')
+
+        # Initialize function arguments
+        codegen_initialize_arguments = ''
+        codegen_initialize_input = ''
+        for arg in function_arguments:
+            if 'fastdg' in arg.name:
+                if use_datasets:
+                    lines = _initialize_arg(arg)
+                    lines = ['  ' + a for a in lines]
+                    lines = [a.replace(arg.name + ';', arg.name + '[i];') for a in lines]
+                    lines.insert(0, 'for(std::size_t i=0; i<datasets; ++i){')
+                    lines.append('}')
+                    codegen_initialize_input += '\n'.join(lines) + '\n'
+                else:
+                    lines = _initialize_arg(arg)
+                    codegen_initialize_arguments += '\n'.join(lines) + '\n'
             else:
                 lines = _initialize_arg(arg)
                 codegen_initialize_arguments += '\n'.join(lines) + '\n'
+        codegen_initialize_arguments = textwrap.indent(codegen_initialize_arguments, '  ')
+        codegen_initialize_input = textwrap.indent(codegen_initialize_input, '  ')
+
+        # Call the benchmark function
+        if use_datasets:
+            arguments_with_datasets = arguments.copy()
+            arguments_with_datasets = [a if 'fastdg' not in a else a + '[i]' for a in arguments]
+            codegen_call_benchmark_function = 'for (std::size_t i=0; i<datasets; ++i){\n'
+            codegen_call_benchmark_function += '  ' + kernel.name + '({})'.format(','.join(arguments_with_datasets)) + ';\n'
+            for arg in input_arguments:
+                codegen_call_benchmark_function += 'benchmark::DoNotOptimize({}[i][0]);\n'.format(arg)
+            codegen_call_benchmark_function += '}'
         else:
-            lines = _initialize_arg(arg)
-            codegen_initialize_arguments += '\n'.join(lines) + '\n'
-    codegen_initialize_arguments = textwrap.indent(codegen_initialize_arguments, '  ')
-    codegen_initialize_input = textwrap.indent(codegen_initialize_input, '  ')
-
-    # Call the benchmark function
-    if use_datasets:
-        arguments_with_datasets = arguments.copy()
-        arguments_with_datasets = [a if 'fastdg' not in a else a + '[i]' for a in arguments]
-        codegen_call_benchmark_function = 'for (std::size_t i=0; i<datasets; ++i){\n'
-        codegen_call_benchmark_function += '  ' + kernel.name + '({})'.format(','.join(arguments_with_datasets)) + ';\n'
-        for arg in input_arguments:
-            codegen_call_benchmark_function += 'benchmark::DoNotOptimize({}[i][0]);\n'.format(arg)
-        codegen_call_benchmark_function += '}'
-    else:
-        codegen_call_benchmark_function = kernel.name + '({})'.format(','.join(arguments)) + ';\n'
-    codegen_call_benchmark_function = textwrap.indent(codegen_call_benchmark_function, '    ')
-
-    # Replace placeholders in benchmark template
-    benchmark = benchmark.replace('${CODEGEN_TRANSFORMATIONS}', codegen_transformations)
-    benchmark = benchmark.replace('${CODEGEN_DECLARE_GLOBAL_ARGUMENTS}', codegen_declare_global_arguments)
-    benchmark = benchmark.replace('${CODEGEN_DECLARE_INPUT}', codegen_declare_input)
-    benchmark = benchmark.replace('${CODEGEN_INITIALIZE_GLOBAL_ARGUMENTS}', codegen_initialize_global_arguments)
-    benchmark = benchmark.replace('${CODEGEN_INITIALIZE_INPUT}', codegen_initialize_input)
-    benchmark = benchmark.replace('${CODEGEN_BENCHMARK_FUNCTION}', codegen_benchmark_function)
-    benchmark = benchmark.replace('${CODEGEN_DECLARE_ARGUMENTS}', codegen_declare_arguments)
-    benchmark = benchmark.replace('${CODEGEN_INITIALIZE_ARGUMENTS}', codegen_initialize_arguments)
-    benchmark = benchmark.replace('${CODEGEN_CALL_BENCHMARK_FUNCTION}', codegen_call_benchmark_function)
-
-    # Write benchmark source file
-    with open(filename, 'w') as f:
-        f.writelines(benchmark)
-
-    # Maybe turn opcounting on again
-    set_option("opcounter", opcounting)
+            codegen_call_benchmark_function = kernel.name + '({})'.format(','.join(arguments)) + ';\n'
+        codegen_call_benchmark_function = textwrap.indent(codegen_call_benchmark_function, '    ')
+
+        # Replace placeholders in benchmark template
+        benchmark = benchmark.replace('${CODEGEN_TRANSFORMATIONS}', codegen_transformations)
+        benchmark = benchmark.replace('${CODEGEN_DECLARE_GLOBAL_ARGUMENTS}', codegen_declare_global_arguments)
+        benchmark = benchmark.replace('${CODEGEN_DECLARE_INPUT}', codegen_declare_input)
+        benchmark = benchmark.replace('${CODEGEN_INITIALIZE_GLOBAL_ARGUMENTS}', codegen_initialize_global_arguments)
+        benchmark = benchmark.replace('${CODEGEN_INITIALIZE_INPUT}', codegen_initialize_input)
+        benchmark = benchmark.replace('${CODEGEN_BENCHMARK_FUNCTION}', codegen_benchmark_function)
+        benchmark = benchmark.replace('${CODEGEN_DECLARE_ARGUMENTS}', codegen_declare_arguments)
+        benchmark = benchmark.replace('${CODEGEN_INITIALIZE_ARGUMENTS}', codegen_initialize_arguments)
+        benchmark = benchmark.replace('${CODEGEN_CALL_BENCHMARK_FUNCTION}', codegen_call_benchmark_function)
+
+        # Write benchmark source file
+        with open(filename, 'w') as f:
+            f.writelines(benchmark)
 
 
 def autotune_realization(sf=None, kernel=None, signature=None, transformations=None):
diff --git a/python/dune/codegen/sumfact/geometry.py b/python/dune/codegen/sumfact/geometry.py
index 811a201e9e11238bcdc4965ce2c7f75426fc3487..79ca2726bd3fab0dca0f7a21598ef13b9fe0dd0a 100644
--- a/python/dune/codegen/sumfact/geometry.py
+++ b/python/dune/codegen/sumfact/geometry.py
@@ -17,7 +17,7 @@ from dune.codegen.generation import (class_member,
                                      )
 from dune.codegen.loopy.flatten import flatten_index
 from dune.codegen.loopy.target import type_floatingpoint
-from dune.codegen.options import get_option
+from dune.codegen.options import get_form_option, get_option
 from dune.codegen.pdelab.geometry import (enforce_boundary_restriction,
                                           local_dimension,
                                           world_dimension,
@@ -41,7 +41,6 @@ from dune.codegen.sumfact.permutation import (permute_backward,
 from dune.codegen.sumfact.quadrature import additional_inames
 from dune.codegen.sumfact.symbolic import SumfactKernelInterfaceBase, SumfactKernel
 from dune.codegen.tools import get_pymbolic_basename, ImmutableCuttingRecord
-from dune.codegen.options import get_form_option, option_switch
 from dune.codegen.ufl.modified_terminals import Restriction
 
 from loopy.match import Writes
@@ -51,8 +50,13 @@ import numpy as np
 import loopy as lp
 
 
+class SumfactGeometryMixinBase(GenericPDELabGeometryMixin):
+    def nonsumfact_fallback(self):
+        return None
+
+
 @geometry_mixin("sumfact_multilinear")
-class SumfactMultiLinearGeometryMixin(GenericPDELabGeometryMixin):
+class SumfactMultiLinearGeometryMixin(SumfactGeometryMixinBase):
     def nonsumfact_fallback(self):
         return "generic"
 
@@ -242,7 +246,7 @@ class SumfactMultiLinearGeometryMixin(GenericPDELabGeometryMixin):
 
 
 @geometry_mixin("sumfact_axiparallel")
-class SumfactAxiParallelGeometryMixin(AxiparallelGeometryMixin):
+class SumfactAxiParallelGeometryMixin(SumfactGeometryMixinBase, AxiparallelGeometryMixin):
     def nonsumfact_fallback(self):
         return "axiparallel"
 
diff --git a/python/dune/codegen/sumfact/switch.py b/python/dune/codegen/sumfact/switch.py
index 65d35c17e4a8d2753e680c8c440fee6e9f4c6adf..6b8c43e4dac7bfd08f7bbbfb6a551b47fd02a499 100644
--- a/python/dune/codegen/sumfact/switch.py
+++ b/python/dune/codegen/sumfact/switch.py
@@ -12,7 +12,7 @@ from dune.codegen.pdelab.signatures import (assembly_routine_args,
                                             assembly_routine_signature,
                                             kernel_name,
                                             )
-from dune.codegen.options import get_form_option, get_option, set_form_option
+from dune.codegen.options import get_form_option, get_option, form_option_context
 from dune.codegen.cgen.clazz import ClassMember
 
 
@@ -26,21 +26,12 @@ def sumfact_generate_kernels_per_integral(integrals):
     if measure == "exterior_facet":
         # Maybe skip sum factorization on boundary integrals
         if not get_form_option("sumfact_on_boundary"):
-            set_form_option("sumfact", False)
-
-            # Try to find a fallback for sum factorized geometry mixins
-            geometry_backup = get_form_option("geometry_mixins")
-            mixin = construct_from_mixins(mixins=[geometry_backup])()
-            if hasattr(mixin, "nonsumfact_fallback"):
-                set_form_option("geometry_mixins", mixin.nonsumfact_fallback())
-
-            for k in generate_kernels_per_integral(integrals):
-                yield k
-
-            # Reset state
-            set_form_option("geometry_mixins", geometry_backup)
-            set_form_option("sumfact", True)
-            return
+            mixin = construct_from_mixins(mixins=[get_form_option("geometry_mixins")])()
+            geometry = mixin.nonsumfact_fallback() or get_form_option("geometry_mixins")
+            with form_option_context(sumfact=False, geometry_mixins=geometry):
+                for k in generate_kernels_per_integral(integrals):
+                    yield k
+                return
 
         # Generate all necessary kernels
         for facedir in range(dim):
diff --git a/python/dune/codegen/sumfact/vectorization.py b/python/dune/codegen/sumfact/vectorization.py
index e753652b10b3ac9b1765ee071bce1ccebd15f6b1..8da146281d6d5323303d215f42117ff5f011da37 100644
--- a/python/dune/codegen/sumfact/vectorization.py
+++ b/python/dune/codegen/sumfact/vectorization.py
@@ -20,7 +20,7 @@ from dune.codegen.sumfact.tabulation import (quadrature_points_per_direction,
                                              set_quadrature_points,
                                              )
 from dune.codegen.error import CodegenVectorizationError
-from dune.codegen.options import get_form_option, get_option, set_form_option
+from dune.codegen.options import get_form_option, get_option, form_option_context
 from dune.codegen.tools import add_to_frozendict, round_to_multiple, list_diff
 
 from pymbolic.mapper.flop_counter import FlopCounter
@@ -331,17 +331,16 @@ def level1_optimal_vectorization_strategy(sumfacts, width):
     # If we are using the 'target' strategy, we might want to log some information.
     if get_form_option("vectorization_strategy") == "target":
         # Print the achieved cost and the target cost on the screen
-        set_form_option("vectorization_strategy", "model")
-        target = float(get_form_option("vectorization_target"))
-        qp = min(optimal_strategies, key=lambda qp: abs(strategy_cost((qp, optimal_strategies[qp])) - target))
-        cost = strategy_cost((qp, optimal_strategies[qp]))
-
-        print("The target cost was:   {}".format(target))
-        print("The achieved cost was: {}".format(cost))
-        optimum = level1_optimal_vectorization_strategy(sumfacts, width)
-        print("The optimal cost would be: {}".format(strategy_cost(optimum)))
-        set_form_option("vectorization_strategy", "target")
-        print("The score in 'target' logic was: {}".format(strategy_cost((qp, optimal_strategies[qp]))))
+        with form_option_context(vectorization_strategy="model"):
+            target = float(get_form_option("vectorization_target"))
+            qp = min(optimal_strategies, key=lambda qp: abs(strategy_cost((qp, optimal_strategies[qp])) - target))
+            cost = strategy_cost((qp, optimal_strategies[qp]))
+
+            print("The target cost was:   {}".format(target))
+            print("The achieved cost was: {}".format(cost))
+            optimum = level1_optimal_vectorization_strategy(sumfacts, width)
+            print("The optimal cost would be: {}".format(strategy_cost(optimum)))
+            print("The score in 'target' logic was: {}".format(strategy_cost((qp, optimal_strategies[qp]))))
 
         # Print the employed vectorization strategy into a file
         suffix = ""
diff --git a/test/poisson/poisson_tensor.ufl b/test/poisson/poisson_tensor.ufl
index b527d05258667dae629f608a1a630e5f11f947b8..3591d4d70e17f8811cf32bb759212c7c33e0bb24 100644
--- a/test/poisson/poisson_tensor.ufl
+++ b/test/poisson/poisson_tensor.ufl
@@ -12,6 +12,9 @@ V = FiniteElement("CG", cell, 1)
 u = TrialFunction(V)
 v = TestFunction(V)
 
+# Test metadata setting of options
+dx = dx(metadata={"quadrature_order": 27})
+
 r= (inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx
 exact_solution = g
 is_dirichlet = 1