From d4f9076457d8b59a90d805d441d8fddf84ee17d1 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 31 Jul 2019 11:51:26 +0200
Subject: [PATCH] Precompute the cache_key

In the /sumfact_poisson_dg_3d_unstructured_deg2_symdiff_nonquadvec_gradvec
test case this reduces the time spent in evaluation of it from >100s to 6s.
---
 python/dune/codegen/sumfact/symbolic.py | 26 ++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py
index dfd9383f..8d81ea04 100644
--- a/python/dune/codegen/sumfact/symbolic.py
+++ b/python/dune/codegen/sumfact/symbolic.py
@@ -562,6 +562,9 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
         ImmutableRecord.__init__(self, **defaultdict)
         prim.Variable.__init__(self, "SUMFACT")
 
+        # Precompute and cache a number of keys
+        self._cached_cache_key = None
+
     #
     # The methods/fields needed to get a well-formed pymbolic node
     #
@@ -617,12 +620,15 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
         Any two sum factorization kernels having the same cache_key
         are realized simultaneously!
         """
-        if self.buffer is None:
-            # During dry run, we return something unique to this kernel
-            return repr(self)
-        else:
-            # Later we identify parallely implemented kernels by the assigned buffer
-            return self.buffer
+        if self._cached_cache_key is None:
+            if self.buffer is None:
+                # During dry run, we return something unique to this kernel
+                self._cached_cache_key = repr(self)
+            else:
+                # Later we identify parallely implemented kernels by the assigned buffer
+                self._cached_cache_key = self.buffer
+
+        return self._cached_cache_key
 
     @property
     def inout_key(self):
@@ -865,6 +871,9 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
 
         prim.Variable.__init__(self, "VecSUMFAC")
 
+        # Precompute and cache a number of keys
+        self._cached_cache_key = None
+
     def __getinitargs__(self):
         return (self.kernels, self.horizontal_width, self.vertical_width, self.buffer, self.insn_dep)
 
@@ -897,7 +906,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
         Any two sum factorization kernels having the same cache_key
         are realized simulatenously!
         """
-        return (self.matrix_sequence_quadrature_permuted, self.restriction, self.stage, self.buffer)
+        if self._cached_cache_key is None:
+            self._cached_cache_key = (self.matrix_sequence_quadrature_permuted, self.restriction, self.stage, self.buffer)
+
+        return self._cached_cache_key
 
     #
     # Deduce all data fields of normal sum factorization kernels from the underlying kernels
-- 
GitLab