diff --git a/python/dune/perftool/loopy/__init__.py b/python/dune/perftool/loopy/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2ef32a24fa3721737845e5ce54d42de834effb5b 100644
--- a/python/dune/perftool/loopy/__init__.py
+++ b/python/dune/perftool/loopy/__init__.py
@@ -0,0 +1,4 @@
+""" Export the interface interesting to the rest of the project """
+
+from dune.perftool.loopy.transformations.collect_precompute import collect_vector_data_precompute
+from dune.perftool.loopy.transformations.duplicate import heuristic_duplication
diff --git a/python/dune/perftool/loopy/collectvector.py b/python/dune/perftool/loopy/transformations/collect_precompute.py
similarity index 89%
rename from python/dune/perftool/loopy/collectvector.py
rename to python/dune/perftool/loopy/transformations/collect_precompute.py
index 3d69d623f28851466fc44690ef876366f4f7aeb3..2324defcff748241ea7dbc1e6ab2f4a786565b34 100644
--- a/python/dune/perftool/loopy/collectvector.py
+++ b/python/dune/perftool/loopy/transformations/collect_precompute.py
@@ -1,9 +1,10 @@
-""" A kernel transformation that collects data until the vector size is reached """
+""" A kernel transformation that precomputes data and then splits computation
+in chunks of vector size independent of divisibility of the loop bounds. """
 
 from dune.perftool.loopy.vcl import get_vcl_type_size
-from dune.perftool.loopy.vectorview import (add_vector_view,
-                                            get_vector_view_name,
-                                            )
+from dune.perftool.loopy.transformations.vectorview import (add_vector_view,
+                                                            get_vector_view_name,
+                                                            )
 from dune.perftool.tools import get_pymbolic_basename
 
 from loopy.kernel.creation import parse_domains
@@ -17,7 +18,7 @@ import loopy as lp
 import numpy as np
 
 
-def collect_vector_data(knl, insns, inames):
+def collect_vector_data_precompute(knl, insns, inames):
     #
     # Process/Assert/Standardize the input
     #
@@ -55,12 +56,9 @@ def collect_vector_data(knl, insns, inames):
 
         # Do precomputation of the quantity
         prec_quantity = "{}_precomputed".format(quantity)
-        compute_id = "{}_compute_id".format(quantity)
-        compute_ids.append(compute_id)
-
         knl = lp.precompute(knl, subst_name, inames,
                             temporary_name=prec_quantity,
-                            compute_insn_id=compute_id)
+                            )
 
         # Introduce a vector view of the precomputation result
         knl = add_vector_view(knl, prec_quantity)
diff --git a/python/dune/perftool/loopy/duplicate.py b/python/dune/perftool/loopy/transformations/duplicate.py
similarity index 100%
rename from python/dune/perftool/loopy/duplicate.py
rename to python/dune/perftool/loopy/transformations/duplicate.py
diff --git a/python/dune/perftool/loopy/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py
similarity index 100%
rename from python/dune/perftool/loopy/vectorview.py
rename to python/dune/perftool/loopy/transformations/vectorview.py
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 8c48334121000df5d0707a9594438787e25f58f1..73a7336782bce45263c0ee5aac5fa1a0a7d59690 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -501,13 +501,13 @@ def generate_kernel(integrals):
     from loopy import make_reduction_inames_unique
     kernel = make_reduction_inames_unique(kernel)
 
+    kernel = preprocess_kernel(kernel)
+
     # Apply the transformations that were gathered during tree traversals
     for trafo in transformations:
         kernel = trafo[0](kernel, *trafo[1])
 
-    kernel = preprocess_kernel(kernel)
-
-    from dune.perftool.loopy.duplicate import heuristic_duplication
+    from dune.perftool.loopy import heuristic_duplication
     kernel = heuristic_duplication(kernel)
 
     # Now add the preambles to the kernel