Skip to content
Snippets Groups Projects
Commit d4f90764 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Precompute the cache_key

In the /sumfact_poisson_dg_3d_unstructured_deg2_symdiff_nonquadvec_gradvec
test case this reduces the time spent in evaluation of it from >100s to 6s.
parent 164dc781
No related branches found
No related tags found
No related merge requests found
...@@ -562,6 +562,9 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): ...@@ -562,6 +562,9 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
ImmutableRecord.__init__(self, **defaultdict) ImmutableRecord.__init__(self, **defaultdict)
prim.Variable.__init__(self, "SUMFACT") prim.Variable.__init__(self, "SUMFACT")
# Precompute and cache a number of keys
self._cached_cache_key = None
# #
# The methods/fields needed to get a well-formed pymbolic node # The methods/fields needed to get a well-formed pymbolic node
# #
...@@ -617,12 +620,15 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): ...@@ -617,12 +620,15 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
Any two sum factorization kernels having the same cache_key Any two sum factorization kernels having the same cache_key
are realized simultaneously! are realized simultaneously!
""" """
if self.buffer is None: if self._cached_cache_key is None:
# During dry run, we return something unique to this kernel if self.buffer is None:
return repr(self) # During dry run, we return something unique to this kernel
else: self._cached_cache_key = repr(self)
# Later we identify parallely implemented kernels by the assigned buffer else:
return self.buffer # Later we identify parallely implemented kernels by the assigned buffer
self._cached_cache_key = self.buffer
return self._cached_cache_key
@property @property
def inout_key(self): def inout_key(self):
...@@ -865,6 +871,9 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) ...@@ -865,6 +871,9 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
prim.Variable.__init__(self, "VecSUMFAC") prim.Variable.__init__(self, "VecSUMFAC")
# Precompute and cache a number of keys
self._cached_cache_key = None
def __getinitargs__(self): def __getinitargs__(self):
return (self.kernels, self.horizontal_width, self.vertical_width, self.buffer, self.insn_dep) return (self.kernels, self.horizontal_width, self.vertical_width, self.buffer, self.insn_dep)
...@@ -897,7 +906,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) ...@@ -897,7 +906,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
Any two sum factorization kernels having the same cache_key Any two sum factorization kernels having the same cache_key
are realized simulatenously! are realized simulatenously!
""" """
return (self.matrix_sequence_quadrature_permuted, self.restriction, self.stage, self.buffer) if self._cached_cache_key is None:
self._cached_cache_key = (self.matrix_sequence_quadrature_permuted, self.restriction, self.stage, self.buffer)
return self._cached_cache_key
# #
# Deduce all data fields of normal sum factorization kernels from the underlying kernels # Deduce all data fields of normal sum factorization kernels from the underlying kernels
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment