From ceb384b18b6cdfde72633c38d3832ce3f8c42489 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Tue, 20 Mar 2018 09:52:59 +0100
Subject: [PATCH] Fixed up buffer size computation

---
 python/dune/perftool/loopy/target.py                     | 6 +++---
 .../perftool/loopy/transformations/vectorize_quad.py     | 2 +-
 python/dune/perftool/sumfact/realization.py              | 9 +++++----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py
index e6015102..9a00a876 100644
--- a/python/dune/perftool/loopy/target.py
+++ b/python/dune/perftool/loopy/target.py
@@ -190,15 +190,15 @@ class DuneASTBuilder(CASTBuilder):
             size = []
             for t in temps:
                 if isinstance(t, DuneTemporaryVariable) and t.custom_base_storage == bs:
-                    # TODO: Extract correct size
-                    alignment.append(8)
+                    #TODO Extract alignment from the temporaries after switching to loopy 2018.1
+                    alignment.append(get_option("max_vector_width") // 8)
                     from pytools import product
                     size.append(product(t.shape))
 
             alignment = max(alignment)
             size = max(size)
 
-            decl =  "char {}[{}] __attribute__ ((aligned({})));".format(bs, size * alignment, alignment)
+            decl =  "char {}[{}] __attribute__ ((aligned({})));".format(bs, size * 8, alignment)
             ret.append(cgen.Line(decl))
 
         if self.target.declare_temporaries:
diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py
index 5022ebaf..e3f30cc2 100644
--- a/python/dune/perftool/loopy/transformations/vectorize_quad.py
+++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py
@@ -242,7 +242,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix):
     for insn in insns:
         # Get a vector view of the lhs expression
         lhsname = get_pymbolic_basename(insn.assignee)
-        knl = add_vector_view(knl, lhsname, pad_to=vec_size)
+        knl = add_vector_view(knl, lhsname)
         lhsname = get_vector_view_name(lhsname)
         rotating = "gradvec" in insn.tags
 
diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 0ce8a595..d113f815 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -32,6 +32,7 @@ from dune.perftool.sumfact.accumulation import sumfact_iname
 from dune.perftool.loopy.target import dtype_floatingpoint
 from dune.perftool.loopy.vcl import ExplicitVCLCast
 
+from pytools import product
 from ufl import MixedElement
 
 import loopy as lp
@@ -78,17 +79,17 @@ def _realize_sum_factorization_kernel(sf):
 
     # Get all the necessary pieces for a function call
     funcname = name_kernel_implementation_function(sf)
-    #TODO calculate the size and alignment correctly
-    size = 10000
-    alignment = 8
     buffers = tuple(name_buffer_storage(sf.buffer, i) for i in range(2))
 
     # Make sure that the storage is allocated and has a certain minimum size
     # This is necessary to allocate buffers that will be passed to sumfact kernel
     # functions. Loopy has no knowledge of what happens with those...
     for buf in buffers:
+        # Determine the necessary size of the buffer. We assume that we do not
+        # underintegrate the form!!!
+        size = product(m.quadrature_size for m in sf.matrix_sequence) * sf.vector_width
         temporary_variable("{}_dummy".format(buf),
-                           shape=(10000,),
+                           shape=(size,),
                            custom_base_storage=buf,
                            decl_method=lambda n, k, di: None,
                            )
-- 
GitLab