diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index 133a1f1712f011953f47515dd08575fbecdbe202..8e7e835137e0c9ab987db1e7e1c3b7bab1abdf6a 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -17,13 +17,17 @@ function_mangler = generator_factory(item_tags=("mangler",))
 silenced_warning = generator_factory(item_tags=("silenced_warning",), no_deco=True)
 
 
+class DuneGlobalArg(loopy.GlobalArg):
+    allowed_extra_kwargs = loopy.GlobalArg.allowed_extra_kwargs + ["managed"]
+
+
 @generator_factory(item_tags=("argument", "globalarg"),
                    cache_key_generator=lambda n, **kw: n)
-def globalarg(name, shape=loopy.auto, **kw):
+def globalarg(name, shape=loopy.auto, managed=True, **kw):
     if isinstance(shape, str):
         shape = (shape,)
     dtype = kw.pop("dtype", numpy.float64)
-    return loopy.GlobalArg(name, dtype=dtype, shape=shape, **kw)
+    return DuneGlobalArg(name, dtype=dtype, shape=shape, managed=managed, **kw)
 
 
 @generator_factory(item_tags=("argument", "constantarg"),
diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py
index 55fb43c71c7218f7969e06a70339372a68acfb26..57ff1d237c48c0c9ed27ea36698410381d1aa718 100644
--- a/python/dune/perftool/loopy/target.py
+++ b/python/dune/perftool/loopy/target.py
@@ -1,5 +1,6 @@
 from dune.perftool.generation import post_include
 
+from dune.perftool.generation.loopy import DuneGlobalArg
 from dune.perftool.loopy.temporary import DuneTemporaryVariable
 from dune.perftool.loopy.vcl import VCLTypeRegistry
 from dune.perftool.generation import (include_file,
@@ -29,7 +30,7 @@ _registry = {'float32': 'float',
 class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper):
     def map_subscript(self, expr, type_context):
         arr = self.find_array(expr)
-        if isinstance(arr, DuneTemporaryVariable) and not arr.managed:
+        if isinstance(arr, (DuneTemporaryVariable, DuneGlobalArg)) and not arr.managed:
             # If there is but one index, we do not need to handle this
             if isinstance(expr.index, (prim.Variable, int)):
                 return expr
diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index e39595f9b35014937cd907f34ae784d038a34d95..5839873dceb5222ffc65edb02246b70243762973 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -66,14 +66,10 @@ def collect_vector_data_rotate(knl, insns, inames):
             basename = get_pymbolic_basename(expr)
             quantities.setdefault(basename, frozenset())
             quantities[basename] = quantities[basename].union(frozenset([expr]))
-    assert all(len(q) == 1 for q in quantities.values())
 
     # Add vector size buffers for all these quantities
-    replacemap_arr = {}
     replacemap_vec = {}
     for quantity in quantities:
-        expr, = quantities[quantity]
-
         # Check whether there is an instruction that writes this quantity within
         # the given inames. If so, we need a buffer array.
         iname_match = lp.match.And(tuple(lp.match.Iname(i) for i in inames))
@@ -83,38 +79,57 @@ def collect_vector_data_rotate(knl, insns, inames):
         all_writers.extend([i.id for i in write_insns])
 
         if write_insns:
+            # Determine the shape of the quantity
+            shape = knl.temporary_variables[quantity].shape
+
             arrname = quantity + '_buffered_arr'
             knl = add_temporary_with_vector_view(knl,
                                                  arrname,
                                                  dtype=np.float64,
-                                                 shape=(vec_size,),
-                                                 dim_tags="c",
+                                                 shape=shape + (vec_size,),
+                                                 dim_tags=",".join("c" for i in range(len(shape) + 1)),
                                                  base_storage=quantity + '_base_storage',
                                                  scope=lp.temp_var_scope.PRIVATE,
                                                  )
 
-            replacemap_arr[quantity] = prim.Subscript(prim.Variable(arrname), (prim.Variable('rotate_index'),))
-            replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(arrname)), (0, prim.Variable(new_iname),))
+            def get_quantity_subscripts(e, zero=False):
+                if isinstance(e, prim.Subscript):
+                    index = e.index
+                    if isinstance(index, tuple):
+                        return index
+                    else:
+                        return (index,)
+                else:
+                    if zero:
+                        return (0,)
+                    else:
+                        return ()
+
+            for expr in quantities[quantity]:
+                replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(arrname)), get_quantity_subscripts(expr, zero=True) + (prim.Variable(new_iname),))
 
             for insn in write_insns:
                 if isinstance(insn, lp.Assignment):
-                    new_insns.append(insn.copy(assignee=replacemap_arr[get_pymbolic_basename(insn.assignee)],
+                    assignee = prim.Subscript(prim.Variable(arrname), get_quantity_subscripts(insn.assignee) + (prim.Variable('rotate_index'),))
+                    new_insns.append(insn.copy(assignee=assignee,
                                                depends_on_is_final=True,
                                                )
                                      )
                 elif isinstance(insn, lp.CInstruction):
                     # Rip apart the code and change the assignee
                     assignee, expression = insn.code.split("=")
-                    assignee = assignee.strip()
-                    assert assignee in replacemap_arr
 
-                    code = "{} ={}".format(str(replacemap_arr[assignee]), expression)
+                    # TODO This is a bit whacky: It only works for scalar assignees
+                    # OTOH this code is on its way out anyway, because of CInstruction
+                    assignee = prim.Subscript(prim.Variable(arrname), (prim.Variable('rotate_index'),))
+
+                    code = "{} ={}".format(str(assignee), expression)
                     new_insns.append(insn.copy(code=code,
                                                depends_on_is_final=True,
                                                ))
                 else:
                     raise NotImplementedError
-        else:
+        elif quantity in knl.temporary_variables:
             # Add a vector view to this quantity
             knl = add_vector_view(knl, quantity)
             replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index b294fbb15cb73b3e07f981fbce33d1b964009fc3..75d879949e6930313b69faec4022bd70a8287bee 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -36,7 +36,8 @@ def get_form_compiler_arguments():
     parser.add_argument("--print-transformations", action="store_true", help="print out dot files after ufl tree transformations")
     parser.add_argument("--print-transformations-dir", type=str, help="place where to put dot files (can be omitted)")
     parser.add_argument("--quadrature-order", type=int, help="Quadrature order used for all integrals.")
-    parser.add_argument("--diagonal-transformation-matrix", action="store_true", help="set option if the jacoby of the transformation is diagonal (axiparallel grids)")
+    parser.add_argument("--diagonal-transformation-matrix", action="store_true", help="set option if the jacobian of the transformation is diagonal (axiparallel grids)")
+    parser.add_argument("--constant-transformation-matrix", action="store_true", help="set option if the jacobian of the transformation is constant on a cell")
     parser.add_argument("--ini-file", type=str, help="An inifile to use. A generated driver will be hard-coded to it, a [formcompiler] section will be used as default values to form compiler arguments (use snake case)")
     parser.add_argument("--timer", action="store_true", help="measure times")
     parser.add_argument("--project-basedir", type=str, help="The base (build) directory of the dune-perftool project")
diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py
index 6d47731038bb96652cf7f50fa27f3791a224423e..9b8efbea778fd2b510fa0534c996363dd242f173 100644
--- a/python/dune/perftool/pdelab/driver.py
+++ b/python/dune/perftool/pdelab/driver.py
@@ -176,9 +176,10 @@ def name_dimension():
 def typedef_grid(name):
     dim = name_dimension()
     if any(_driver_data['form'].ufl_cell().cellname() in x for x in ["vertex", "interval", "quadrilateral", "hexahedron"]):
-        # For Yasp Grids the jacobi of the transformation is diagonal
+        # For Yasp Grids the jacobi of the transformation is diagonal and constant on each cell
         from dune.perftool.options import set_option
         set_option('diagonal_transformation_matrix', True)
+        set_option('constant_transformation_matrix', True)
 
         gridt = "Dune::YaspGrid<{}>".format(dim)
         include_file("dune/grid/yaspgrid.hh", filetag="driver")
diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py
index 63ffcd08ad8362c276b54034f9bc4a4e7a74ca47..9d8a1c28d1bfbd50af29e9c3f6c850b60a949de1 100644
--- a/python/dune/perftool/pdelab/geometry.py
+++ b/python/dune/perftool/pdelab/geometry.py
@@ -1,14 +1,18 @@
 from dune.perftool.ufl.modified_terminals import Restriction
 from dune.perftool.pdelab.restriction import restricted_name
-from dune.perftool.generation import (cached,
+from dune.perftool.generation import (backend,
+                                      cached,
                                       domain,
                                       get_backend,
                                       get_global_context_value,
+                                      globalarg,
                                       iname,
                                       include_file,
                                       preamble,
                                       temporary_variable,
+                                      valuearg,
                                       )
+from dune.perftool.options import option_switch
 from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_position_in_cell,
                                              quadrature_preamble,
                                              )
@@ -17,6 +21,8 @@ from ufl.algorithms import MultiFunction
 from pymbolic.primitives import Variable
 from pymbolic.primitives import Expression as PymbolicExpression
 
+import numpy as np
+
 
 @preamble
 def define_reference_element(name):
@@ -281,6 +287,26 @@ def define_jacobian_inverse_transposed_temporary(restriction):
     return _define_jacobian_inverse_transposed_temporary
 
 
+@preamble
+@backend(interface="define_jit", name="constant_transformation_matrix")
+def define_constant_jacobian_inveser_transposed(name, restriction):
+    geo = name_cell_geometry(restriction)
+    pos = name_localcenter()
+    dim = name_dimension()
+
+    if restriction:
+        geo_in = name_in_cell_geometry(restriction)
+        pos = "{}.global({})".format(geo_in, pos)
+
+    globalarg(name, dtype=np.float64, shape=(dim, dim), managed=False)
+
+    return 'auto {} = {}.jacobianInverseTransposed({});'.format(name,
+                                                                geo,
+                                                                pos,
+                                                                )
+
+
+@backend(interface="define_jit", name="default")
 def define_jacobian_inverse_transposed(name, restriction):
     dim = name_dimension()
     temporary_variable(name, decl_method=define_jacobian_inverse_transposed_temporary(restriction), shape=(dim, dim))
@@ -297,10 +323,25 @@ def define_jacobian_inverse_transposed(name, restriction):
 
 def name_jacobian_inverse_transposed(restriction):
     name = restricted_name("jit", restriction)
-    define_jacobian_inverse_transposed(name, restriction)
+    get_backend(interface="define_jit", selector=option_switch("constant_transformation_matrix"))(name, restriction)
     return name
 
 
+@backend(interface="detjac", name="constant_transformation_matrix")
+@preamble
+def define_constant_jacobian_determinant(name):
+    geo = name_geometry()
+    pos = name_localcenter()
+
+    valuearg(name, dtype=np.float64)
+
+    return "auto {} = {}.integrationElement({});".format(name,
+                                                         geo,
+                                                         pos,
+                                                         )
+
+
+@backend(interface="detjac", name="default")
 def define_jacobian_determinant(name):
     temporary_variable(name, shape=())
     geo = name_geometry()
@@ -317,11 +358,11 @@ def define_jacobian_determinant(name):
 
 def name_jacobian_determinant():
     name = 'detjac'
-    define_jacobian_determinant(name)
+    get_backend(interface="detjac", selector=option_switch("constant_transformation_matrix"))(name)
     return name
 
 
 def name_facet_jacobian_determinant():
     name = 'fdetjac'
-    define_jacobian_determinant(name)
+    get_backend(interface="detjac", selector=option_switch("constant_transformation_matrix"))(name)
     return name
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index cd64a827e2a2ba90053ae3a61cabe8770d2b4207..e084853782f0d8c44ab85d984977c18f35a0e753 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -26,6 +26,8 @@ from dune.perftool.ufl.modified_terminals import Restriction
 from pymbolic.primitives import Variable
 from pytools import Record
 
+import loopy as lp
+
 
 def name_form(formdata, data):
     # Check wether the formdata has a name in UFL
@@ -508,6 +510,19 @@ def generate_kernel(integrals):
     from dune.perftool.loopy import heuristic_duplication
     kernel = heuristic_duplication(kernel)
 
+    # Maybe apply vectorization strategies
+    if get_option("vectorize"):
+        if get_option("sumfact"):
+            # Vectorization of the quadrature loop
+            insns = [i.id for i in lp.find_instructions(kernel, lp.match.Tagged("quadvec"))]
+            from dune.perftool.sumfact.quadrature import quadrature_inames
+            inames = quadrature_inames()
+
+            from dune.perftool.loopy.transformations.collect_rotate import collect_vector_data_rotate
+            kernel = collect_vector_data_rotate(kernel, insns, inames)
+        else:
+            raise NotImplementedError("Only vectorizing sumfactoized code right now!")
+
     # Now add the preambles to the kernel
     preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("preamble"))]
     kernel = kernel.copy(preambles=preambles)
diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py
index c0ddc6a99dac2c0f2f179e366e7d76846f28c7c4..72a42aa73ca20e0543f390ccbd193b5fc2895943 100644
--- a/python/dune/perftool/pdelab/quadrature.py
+++ b/python/dune/perftool/pdelab/quadrature.py
@@ -118,6 +118,8 @@ def name_quadrature_points():
     """Name of vector storing quadrature points as class member"""
     dim = _local_dim()
     name = "qp_order" + str(dim)
+    shape = (name_quadrature_bound(), dim)
+    globalarg(name, shape=shape, dtype=numpy.float64, managed=False)
     define_quadrature_points(name)
     fill_quadrature_points_cache(name)
     return name
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index 43b0ff43f682abca2ebcfe714a67bc129537cdb5..77edc9e0151799baa1f9c60c6f7355d1dbcc2006 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -41,32 +41,13 @@ def name_sumfact_base_buffer():
 
 @cached
 def sumfact_evaluate_coefficient_gradient(element, name, restriction, component):
-    # First we determine the rank of the tensor we are talking about
+    # Get a temporary for the gradient
     from ufl.functionview import select_subelement
     sub_element = select_subelement(element, component)
     rank = len(sub_element.value_shape()) + 1
-
-    # We do then set some variables accordingly
     shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
     shape_impl = ('arr',) * rank
-
-    from dune.perftool.pdelab.geometry import dimension_iname
-    idims = tuple(dimension_iname(count=i) for i in range(rank))
-    leaf_element = sub_element
-    from ufl import VectorElement, TensorElement
-    if isinstance(sub_element, (VectorElement, TensorElement)):
-        leaf_element = sub_element.sub_elements()[0]
-
-    # and proceed to call the necessary generator functions
     temporary_variable(name, shape=shape, shape_impl=shape_impl)
-    from dune.perftool.pdelab.spaces import name_lfs
-    lfs = name_lfs(element, restriction, component)
-    from dune.perftool.pdelab.basis import pymbolic_reference_gradient
-    basis = pymbolic_reference_gradient(leaf_element, restriction, 0, context='trialgrad')
-    from dune.perftool.tools import get_pymbolic_indices
-    index, _ = get_pymbolic_indices(basis)
-    if isinstance(sub_element, (VectorElement, TensorElement)):
-        lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry())
 
     # Calculate values with sumfactorization
     theta = name_theta()
@@ -111,7 +92,7 @@ def sumfact_evaluate_coefficient_gradient(element, name, restriction, component)
         expression = Subscript(Variable(buf), tuple(Variable(i) for i in quadrature_inames()))
         instruction(assignee=assignee,
                     expression=expression,
-                    forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
+                    forced_iname_deps=frozenset(get_backend("quad_inames")()),
                     forced_iname_deps_is_final=True,
                     )
 
@@ -204,9 +185,6 @@ def pymbolic_basis(element, restriction, number):
 @backend(interface="evaluate_grad")
 @cached
 def evaluate_reference_gradient(element, name, restriction):
-    # from dune.perftool.pdelab.basis import name_leaf_lfs
-    # lfs = name_leaf_lfs(element, restriction)
-    # from dune.perftool.pdelab.spaces import name_lfs_bound
     from dune.perftool.pdelab.geometry import name_dimension
     temporary_variable(
         name,
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index f163217dcd1db4e73f13c7ee9ef8c3064bbade06..3624022806170316035c10b4fa4e98263d7d52ef 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -157,6 +157,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                   expression=expression,
                                   forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
                                   forced_iname_deps_is_final=True,
+                                  tags=frozenset({"quadvec"}),
                                   )
         contribution_ids.append(contrib_dep)
 
@@ -205,11 +206,6 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
         # Mark the transformation that moves the quadrature loop inside the trialfunction loops for application
         transform(nest_quadrature_loops, visitor.inames)
 
-    # Maybe try to vectorize!
-    if get_option("vectorize"):
-        from dune.perftool.loopy.transformations.collect_rotate import collect_vector_data_rotate
-        transform(collect_vector_data_rotate, contribution_ids, quadrature_inames())
-
 
 def sum_factorization_kernel(a_matrices, buf, insn_dep=frozenset({}), additional_inames=frozenset({})):
     """
diff --git a/test/sumfact/poisson/poisson_order1.mini b/test/sumfact/poisson/poisson_order1.mini
index 0c4919fea7ce92f8a897e7ace214fc8041b4d086..7591840603d2880c21542f1f3df3d3c8a818ef73 100644
--- a/test/sumfact/poisson/poisson_order1.mini
+++ b/test/sumfact/poisson/poisson_order1.mini
@@ -1,5 +1,8 @@
 __name = sumfact_poisson_order1_{__exec_suffix}
-__exec_suffix = numdiff, symdiff | expand num
+__exec_suffix = {diff_suffix}_{vec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+vec_suffix = vec, nonvec | expand vec
 
 cells = 8 8
 extension = 1. 1.
@@ -14,3 +17,4 @@ numerical_jacobian = 1, 0 | expand num
 exact_solution_expression = g
 compare_l2errorsquared = 1e-4
 sumfact = 1
+vectorize = 1, 0 | expand vec
diff --git a/test/sumfact/poisson/poisson_order2.mini b/test/sumfact/poisson/poisson_order2.mini
index 88e4f3b89ca7f9ca82faa1680d85b4429e632ea1..8830f173ccb742221c987af29181938ee230f2d9 100644
--- a/test/sumfact/poisson/poisson_order2.mini
+++ b/test/sumfact/poisson/poisson_order2.mini
@@ -1,5 +1,8 @@
 __name = sumfact_poisson_order2_{__exec_suffix}
-__exec_suffix = numdiff, symdiff | expand num
+__exec_suffix = {diff_suffix}_{vec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+vec_suffix = vec, nonvec | expand vec
 
 cells = 8 8
 extension = 1. 1.
@@ -14,3 +17,4 @@ numerical_jacobian = 1, 0 | expand num
 exact_solution_expression = g
 compare_l2errorsquared = 1e-8
 sumfact = 1
+vectorize = 1, 0 | expand vec