From 2de8f89914a9627347e971b05fe28c274757fb39 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 4 Oct 2018 11:23:06 +0200 Subject: [PATCH] A few fixes --- python/dune/perftool/sumfact/accumulation.py | 5 +++++ python/dune/perftool/sumfact/autotune.py | 14 ++++++++------ python/dune/perftool/sumfact/basis.py | 5 +++++ python/dune/perftool/sumfact/symbolic.py | 8 ++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index abad6788..d0c6fba6 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -243,6 +243,11 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): else: return () + @property + def fastdg_interface_object_size(self): + size = sum(_local_sizes(self.trial_element)) if self.trial_element else 1 + return size * sum(_local_sizes(self.test_element)) + def _local_sizes(element): from ufl import FiniteElement, MixedElement diff --git a/python/dune/perftool/sumfact/autotune.py b/python/dune/perftool/sumfact/autotune.py index af324722..df8ab590 100644 --- a/python/dune/perftool/sumfact/autotune.py +++ b/python/dune/perftool/sumfact/autotune.py @@ -3,7 +3,7 @@ from dune.perftool.generation import cache_restoring, delete_cache_items from dune.perftool.loopy.target import DuneTarget from dune.perftool.sumfact.realization import realize_sumfact_kernel_function -from dune.perftool.options import get_option +from dune.perftool.options import get_option, set_option import loopy as lp from pytools import product @@ -82,8 +82,11 @@ def generate_standalone_code(sf, filename, logname): ]) # Setup a polynomial object (normally done in the LocalOperator members) + opcounting = get_option("opcounter") + set_option("opcounter", False) from dune.perftool.loopy.target import type_floatingpoint real = type_floatingpoint() + set_option("opcounter", opcounting) f.write(" using RF = {};\n".format(real)) f.write(" using DF = {};\n".format(real)) @@ -102,6 +105,7 @@ def generate_standalone_code(sf, filename, logname): # Allocate buffers size = max(product(m.quadrature_size for m in sf.matrix_sequence) * sf.vector_width, product(m.basis_size for m in sf.matrix_sequence) * sf.vector_width) + size = int(size * (get_option("precision_bits") / 8)) f.writelines([" char buffer0[{}] __attribute__ ((aligned (32)));\n".format(size), " char buffer1[{}] __attribute__ ((aligned (32)));\n".format(size), ]) @@ -111,10 +115,8 @@ def generate_standalone_code(sf, filename, logname): if "jacobian" in arg: f.write("{} = 0;\n".format(arg)) else: - basis_size = product(m.basis_size for m in sf.matrix_sequence) - if sf.within_inames: - basis_size = basis_size * basis_size - f.write("RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], product(m.basis_size for m in sf.matrix_sequence))) + size = sf.interface.fastdg_interface_object_size + f.write("RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], size)) # Write stuff into the input buffer f.writelines([" {0} *input = ({0} *)buffer0;\n".format(real), @@ -162,7 +164,7 @@ def generate_standalone_code(sf, filename, logname): ]) # Add the implementation of the kernel. - f.write(" for(int i=0; i<10000000; ++i)\n") + f.write(" for(int i=0; i<{}; ++i)\n".format(int(1e9 / sf.operations))) f.write(" {\n") for line in knl.member.lines[1:]: f.write(" {}\n".format(line)) diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index e9584d1d..22f5ffad 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -138,6 +138,11 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): else: return () + @property + def fastdg_interface_object_size(self): + from dune.perftool.sumfact.accumulation import _local_sizes + return sum(_local_sizes(self.element)) + def _basis_functions_per_direction(element): """Number of basis functions per direction """ diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index fb283a05..3365ddca 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -123,6 +123,10 @@ class VectorSumfactKernelInput(SumfactKernelInterfaceBase): def function_name_suffix(self): return "".join(i.function_name_suffix for i in remove_duplicates(self.interfaces)) + @property + def fastdg_interface_object_size(self): + return self.interfaces[0].fastdg_interface_object_size + class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): def __init__(self, interfaces): @@ -211,6 +215,10 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): def function_name_suffix(self): return "".join(i.function_name_suffix for i in remove_duplicates(self.interfaces)) + @property + def fastdg_interface_object_size(self): + return self.interfaces[0].fastdg_interface_object_size + class SumfactKernelBase(object): pass -- GitLab