diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py
index 8537a819a2e3ed67d66bb67aa8ccb91c93ee0b34..a1d7442addc4e2d1d720b1cc81d8a914c1f75532 100644
--- a/python/dune/codegen/sumfact/autotune.py
+++ b/python/dune/codegen/sumfact/autotune.py
@@ -115,11 +115,12 @@ def write_setup_code(sf, filename, define_thetas=True):
         constructor_knl = lp.get_one_scheduled_kernel(constructor_knl)
 
         # Allocate buffers
+        alignment = get_option("max_vector_width") // 8
         size = max(product(m.quadrature_size for m in sf.matrix_sequence_quadrature_permuted) * sf.vector_width,
                    product(m.basis_size for m in sf.matrix_sequence_quadrature_permuted) * sf.vector_width)
         size = int(size * (get_option("precision_bits") / 8))
-        f.writelines(["  char buffer0[{}] __attribute__ ((aligned (32)));\n".format(size),
-                      "  char buffer1[{}] __attribute__ ((aligned (32)));\n".format(size),
+        f.writelines(["  char buffer0[{}] __attribute__ ((aligned ({})));\n".format(size, alignment),
+                      "  char buffer1[{}] __attribute__ ((aligned ({})));\n".format(size, alignment),
                       ])
 
         # Setup fastdg inputs
@@ -128,7 +129,7 @@ def write_setup_code(sf, filename, define_thetas=True):
                 f.write("{} = 0;\n".format(arg))
             else:
                 size = sf.interface.fastdg_interface_object_size
-                f.write("  RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], size))
+                f.write("  RF {}[{}] __attribute__ ((aligned ({})));\n".format(arg.split()[-1], size, alignment))
 
         # Write stuff into the input buffer
         f.writelines(["  {0} *input = ({0} *)buffer0;\n".format(real),