diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py
index 45cd7548f68199116279122fac7db08f50a9ce53..25b3a79578364f9be94374e03399881e06dc9ae9 100644
--- a/python/dune/codegen/sumfact/symbolic.py
+++ b/python/dune/codegen/sumfact/symbolic.py
@@ -994,7 +994,16 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
 
     def horizontal_index(self, sf):
         for i, k in enumerate(self.kernels):
-            if sf.inout_key == k.inout_key:
+            # We need to identify to which part of the vectorized kernel sf
+            # corresponds. Since splitting might change the cost_permutation we
+            # exclude it in the comparison below. We also make sure to check
+            # that derivatives are the same.
+            from copy import deepcopy
+            sf_interface = deepcopy(sf.interface)
+            sf_interface._cost_permutation=None
+            k_interface = deepcopy(k.interface)
+            k_interface._cost_permutation=None
+            if repr(sf_interface) == repr(k_interface):
                 if tuple(mat.derivative for mat in sf.matrix_sequence_quadrature_permuted) == tuple(mat.derivative for mat in k.matrix_sequence_quadrature_permuted):
                     return i
 
@@ -1050,7 +1059,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
 
         quad_inames = quadrature_inames(element)
         sliced = 0
-        if len(sf.matrix_sequence_quadrature_permuted) == local_dimension():
+        if len(self.matrix_sequence_quadrature_permuted) == local_dimension():
             for d in range(local_dimension()):
                 if self.matrix_sequence_quadrature_permuted[d].slice_size:
                     sliced = prim.Variable(quad_inames[d])