From ceb384b18b6cdfde72633c38d3832ce3f8c42489 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Tue, 20 Mar 2018 09:52:59 +0100 Subject: [PATCH] Fixed up buffer size computation --- python/dune/perftool/loopy/target.py | 6 +++--- .../perftool/loopy/transformations/vectorize_quad.py | 2 +- python/dune/perftool/sumfact/realization.py | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index e6015102..9a00a876 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -190,15 +190,15 @@ class DuneASTBuilder(CASTBuilder): size = [] for t in temps: if isinstance(t, DuneTemporaryVariable) and t.custom_base_storage == bs: - # TODO: Extract correct size - alignment.append(8) + #TODO Extract alignment from the temporaries after switching to loopy 2018.1 + alignment.append(get_option("max_vector_width") // 8) from pytools import product size.append(product(t.shape)) alignment = max(alignment) size = max(size) - decl = "char {}[{}] __attribute__ ((aligned({})));".format(bs, size * alignment, alignment) + decl = "char {}[{}] __attribute__ ((aligned({})));".format(bs, size * 8, alignment) ret.append(cgen.Line(decl)) if self.target.declare_temporaries: diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py index 5022ebaf..e3f30cc2 100644 --- a/python/dune/perftool/loopy/transformations/vectorize_quad.py +++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py @@ -242,7 +242,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): for insn in insns: # Get a vector view of the lhs expression lhsname = get_pymbolic_basename(insn.assignee) - knl = add_vector_view(knl, lhsname, pad_to=vec_size) + knl = add_vector_view(knl, lhsname) lhsname = get_vector_view_name(lhsname) rotating = "gradvec" in insn.tags diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 0ce8a595..d113f815 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -32,6 +32,7 @@ from dune.perftool.sumfact.accumulation import sumfact_iname from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.vcl import ExplicitVCLCast +from pytools import product from ufl import MixedElement import loopy as lp @@ -78,17 +79,17 @@ def _realize_sum_factorization_kernel(sf): # Get all the necessary pieces for a function call funcname = name_kernel_implementation_function(sf) - #TODO calculate the size and alignment correctly - size = 10000 - alignment = 8 buffers = tuple(name_buffer_storage(sf.buffer, i) for i in range(2)) # Make sure that the storage is allocated and has a certain minimum size # This is necessary to allocate buffers that will be passed to sumfact kernel # functions. Loopy has no knowledge of what happens with those... for buf in buffers: + # Determine the necessary size of the buffer. We assume that we do not + # underintegrate the form!!! + size = product(m.quadrature_size for m in sf.matrix_sequence) * sf.vector_width temporary_variable("{}_dummy".format(buf), - shape=(10000,), + shape=(size,), custom_base_storage=buf, decl_method=lambda n, k, di: None, ) -- GitLab