diff --git a/python/dune/codegen/sumfact/accumulation.py b/python/dune/codegen/sumfact/accumulation.py index f327fceb0ba5e31d14dadaa8d977e7147c6655ed..ae4a763063f0dc25303fc517356b58b02551eebc 100644 --- a/python/dune/codegen/sumfact/accumulation.py +++ b/python/dune/codegen/sumfact/accumulation.py @@ -44,7 +44,7 @@ from dune.codegen.sumfact.switch import (get_facedir, ) from dune.codegen.sumfact.symbolic import SumfactKernel, SumfactKernelInterfaceBase from dune.codegen.ufl.modified_terminals import extract_modified_arguments -from dune.codegen.tools import get_pymbolic_basename, get_leaf +from dune.codegen.tools import get_pymbolic_basename, get_leaf, ImmutableCuttingRecord from dune.codegen.error import CodegenError from pytools import ImmutableRecord, product @@ -90,7 +90,7 @@ def accum_iname(element, bound, i): return sumfact_iname(bound, "accum{}".format(suffix)) -class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): +class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableCuttingRecord): def __init__(self, matrix_sequence=None, accumvar=None, @@ -112,26 +112,20 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): # recalculating it in the property. dim = world_dimension() quadrature_permutation = sumfact_quadrature_permutation_strategy(dim, restriction[0]) - - # Calculate cost optimal permutation matrix_sequence = permute_forward(matrix_sequence, quadrature_permutation) - cost_permutation = sumfact_cost_permutation_strategy(matrix_sequence, self.stage) # TODO: Isnt accumvar superfluous in the presence of all the other infos? # Note: Do not put matrix_sequence into the Record. That screws up the vectorization strategy! - ImmutableRecord.__init__(self, - accumvar=accumvar, - restriction=restriction, - test_element=test_element, - test_element_index=test_element_index, - trial_element=trial_element, - trial_element_index=trial_element_index, - _quadrature_permutation=quadrature_permutation, - _cost_permutation=cost_permutation, - ) - - def __repr__(self): - return ImmutableRecord.__repr__(self) + ImmutableCuttingRecord.__init__(self, + accumvar=accumvar, + restriction=restriction, + test_element=test_element, + test_element_index=test_element_index, + trial_element=trial_element, + trial_element_index=trial_element_index, + _quadrature_permutation=quadrature_permutation, + _permuted_matrix_sequence=matrix_sequence, + ) def get_keyword_arguments(self): """Get dictionary of keyword arguments needed to initialize this class @@ -141,7 +135,7 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): this dict to create an interface. """ dict = self.get_copy_kwargs() - del dict['_cost_permutation'] + del dict['_permuted_matrix_sequence'] del dict['_quadrature_permutation'] dict['matrix_sequence'] = None return dict @@ -152,7 +146,7 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): @property def cost_permutation(self): - return self._cost_permutation + return sumfact_cost_permutation_strategy(self._permuted_matrix_sequence, self.stage) @property def stage(self): diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index 4b9a1d132214652a9258cb59b46364117aeece1f..68d81957e00917d55383282c748879f7f96db587 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -13,6 +13,7 @@ import os import re import subprocess import filelock +import hashlib def get_cmake_cache_entry(entry): @@ -192,10 +193,12 @@ def autotune_realization(sf): os.mkdir(dir) basename = "autotune_sumfact_{}".format(sf.function_name) - name = os.path.join(dir, "autotune_sumfact_{}".format(sf.function_name)) + basename = hashlib.sha256(basename.encode()).hexdigest() + filename = os.path.join(dir, "{}.cc".format(basename)) logname = os.path.join(dir, "{}.log".format(basename)) - lock = "{}.lock".format(name) + lock = os.path.join(dir, "{}.lock".format(basename)) + executable = os.path.join(dir, basename) # Generate and compile a benchmark program with cache_restoring(): @@ -203,20 +206,20 @@ def autotune_realization(sf): if not os.path.isfile(logname): generate_standalone_code(sf, filename) - ret = subprocess.call(compiler_invocation(name, filename)) + devnull = open(os.devnull, 'w') + ret = subprocess.call(compiler_invocation(executable, filename), stdout=devnull, stderr=subprocess.STDOUT) if ret != 0: raise CodegenAutotuneError("Compilation of autotune executable failed. Invocation: {}".format(" ".join(compiler_invocation(name, filename)))) # Check whether the user specified an execution wrapper call = [] - wrapper = get_cmake_cache_entry("DUNE_PERFTOOL_BENCHMARK_WRAPPER") + wrapper = get_cmake_cache_entry("DUNE_CODEGEN_BENCHMARK_WRAPPER") if wrapper: call.append(wrapper) # Run the benchmark program - call.append(name) + call.append(executable) call.append(logname) - devnull = open(os.devnull, 'w') ret = subprocess.call(call, stdout=devnull, stderr=subprocess.STDOUT) if ret != 0: raise CodegenAutotuneError("Execution of autotune benchmark failed. Invocation: {}".format(" ".join(call))) diff --git a/python/dune/codegen/sumfact/basis.py b/python/dune/codegen/sumfact/basis.py index 9fac39426df4339a5804237418e5edfcd6e092c6..c9b75eb445af01e9420e850650b12054dd0115f3 100644 --- a/python/dune/codegen/sumfact/basis.py +++ b/python/dune/codegen/sumfact/basis.py @@ -44,13 +44,13 @@ from dune.codegen.options import get_form_option from dune.codegen.pdelab.driver import FEM_name_mangling from dune.codegen.pdelab.restriction import restricted_name from dune.codegen.pdelab.spaces import name_lfs, name_lfs_bound, name_leaf_lfs -from dune.codegen.tools import maybe_wrap_subscript +from dune.codegen.tools import maybe_wrap_subscript, ImmutableCuttingRecord from dune.codegen.pdelab.basis import shape_as_pymbolic from dune.codegen.sumfact.accumulation import sumfact_iname from ufl import MixedElement, VectorElement, TensorElement, TensorProductElement -from pytools import product, ImmutableRecord +from pytools import product from loopy.match import Writes @@ -235,7 +235,7 @@ class SumfactBasisMixin(GenericBasisMixin): return prim.Subscript(var, vsf.quadrature_index(sf, self)) -class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): +class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableCuttingRecord): def __init__(self, matrix_sequence=None, coeff_func=None, @@ -252,25 +252,17 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): # recalculating it in the property. dim = world_dimension() quadrature_permutation = sumfact_quadrature_permutation_strategy(dim, restriction) - matrix_sequence = permute_forward(matrix_sequence, quadrature_permutation) - cost_permutation = sumfact_cost_permutation_strategy(matrix_sequence, self.stage) # Note: Do not put matrix_sequence into the Record. That screws up the vectorization strategy! - ImmutableRecord.__init__(self, - coeff_func=coeff_func, - element=element, - element_index=element_index, - restriction=restriction, - _quadrature_permutation=quadrature_permutation, - _cost_permutation=cost_permutation, - ) - - def __repr__(self): - return ImmutableRecord.__repr__(self) - - def __str__(self): - return repr(self) + ImmutableCuttingRecord.__init__(self, + coeff_func=coeff_func, + element=element, + element_index=element_index, + restriction=restriction, + _quadrature_permutation=quadrature_permutation, + _permuted_matrix_sequence=matrix_sequence, + ) def get_keyword_arguments(self): """Get dictionary of keyword arguments needed to initialize this class @@ -280,7 +272,7 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): this dict to create an interface. """ dict = self.get_copy_kwargs() - del dict['_cost_permutation'] + del dict['_permuted_matrix_sequence'] del dict['_quadrature_permutation'] dict['matrix_sequence'] = None return dict @@ -291,7 +283,7 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): @property def cost_permutation(self): - return self._cost_permutation + return sumfact_cost_permutation_strategy(self._permuted_matrix_sequence, self.stage) @property def stage(self): diff --git a/python/dune/codegen/sumfact/geometry.py b/python/dune/codegen/sumfact/geometry.py index 08e9d43e8fe685b3b24b60bd10be418b5f078ad7..d37558f5c40f42355b5fa99ed9a4ea25ab7ade28 100644 --- a/python/dune/codegen/sumfact/geometry.py +++ b/python/dune/codegen/sumfact/geometry.py @@ -41,12 +41,10 @@ from dune.codegen.sumfact.permutation import (permute_backward, from dune.codegen.sumfact.quadrature import additional_inames from dune.codegen.sumfact.switch import get_facedir, get_facemod from dune.codegen.sumfact.symbolic import SumfactKernelInterfaceBase, SumfactKernel -from dune.codegen.tools import get_pymbolic_basename +from dune.codegen.tools import get_pymbolic_basename, ImmutableCuttingRecord from dune.codegen.options import get_form_option, option_switch from dune.codegen.ufl.modified_terminals import Restriction -from pytools import ImmutableRecord - from loopy.match import Writes import pymbolic.primitives as prim @@ -337,7 +335,7 @@ def global_corner_iname(restriction): return name -class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): +class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableCuttingRecord): def __init__(self, matrix_sequence=None, direction=None, @@ -361,23 +359,15 @@ class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): # recalculating it in the property. dim = world_dimension() quadrature_permutation = sumfact_quadrature_permutation_strategy(dim, restriction) - matrix_sequence = permute_forward(matrix_sequence, quadrature_permutation) - cost_permutation = sumfact_cost_permutation_strategy(matrix_sequence, self.stage) # Note: Do not put matrix_sequence into the Record. That screws up the vectorization strategy! - ImmutableRecord.__init__(self, - direction=direction, - restriction=restriction, - _quadrature_permutation=quadrature_permutation, - _cost_permutation=cost_permutation, - ) - - def __repr__(self): - return ImmutableRecord.__repr__(self) - - def __str__(self): - return repr(self) + ImmutableCuttingRecord.__init__(self, + direction=direction, + restriction=restriction, + _quadrature_permutation=quadrature_permutation, + _permuted_matrix_sequence=matrix_sequence, + ) def get_keyword_arguments(self): """Get dictionary of keyword arguments needed to initialize this class @@ -387,7 +377,7 @@ class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): this dict to create an interface. """ dict = self.get_copy_kwargs() - del dict['_cost_permutation'] + del dict['_permuted_matrix_sequence'] del dict['_quadrature_permutation'] dict['matrix_sequence'] = None return dict @@ -398,7 +388,7 @@ class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): @property def cost_permutation(self): - return self._cost_permutation + return sumfact_cost_permutation_strategy(self._permuted_matrix_sequence, self.stage) @property def stage(self): diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py index 2768ece8993b01565a2eeacb4311fa976174c236..60641b0ce4af4ddef525a4c03a438c4ebe277c0c 100644 --- a/python/dune/codegen/sumfact/symbolic.py +++ b/python/dune/codegen/sumfact/symbolic.py @@ -214,9 +214,6 @@ class SumfactKernelInterfaceBase(object): def function_name_suffix(self): return "" - def __repr__(self): - return "SumfactKernelInterfaceBase()" - class VectorSumfactKernelInput(SumfactKernelInterfaceBase): def __init__(self, interfaces): @@ -240,8 +237,6 @@ class VectorSumfactKernelInput(SumfactKernelInterfaceBase): # permutation. For both structured and unstructured grids the order of # the global directions should be the same leading to the same cost # permutation for all those sum factorization kernels. - for i in self.interfaces: - assert i.cost_permutation == self.interfaces[0].cost_permutation return self.interfaces[0].cost_permutation @property @@ -608,7 +603,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): # TODO: For now we do not vectorize SumfactKernels with different # quadrature_permutation. This should be handled like upper/lower # vectorization - return tuple(m.quadrature_size for m in self.matrix_sequence_quadrature_permuted) + tuple(m.basis_size for m in self.matrix_sequence_quadrature_permuted) + (self.stage, self.buffer, self.interface.within_inames) + (self.interface.direct_is_possible, self.interface.quadrature_permutation) + return tuple(m.basis_size for m in self.matrix_sequence_quadrature_permuted) + (self.stage, self.buffer, self.interface.within_inames) + (self.interface.direct_is_possible, self.interface.quadrature_permutation) @property def cache_key(self): @@ -847,20 +842,9 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) # For now we don't mix direct and non_direct input. Could be done in an upper/lower way. assert len(set(tuple(k.interface.direct_is_possible for k in kernels))) == 1 - # Assert properties of the matrix sequence of the underlying kernels - for i in range(kernels[0].length): - assert len(set(tuple(k.matrix_sequence_quadrature_permuted[i].rows for k in kernels))) == 1 - assert len(set(tuple(k.matrix_sequence_quadrature_permuted[i].cols for k in kernels))) == 1 - assert len(set(tuple(k.matrix_sequence_quadrature_permuted[i].direction for k in kernels))) == 1 - assert len(set(tuple(k.matrix_sequence_quadrature_permuted[i].transpose for k in kernels))) == 1 - # Join the instruction dependencies of all subkernels insn_dep = insn_dep.union(k.insn_dep for k in kernels) - # Assert that the cost_permutation is the same for all kernels - for k in kernels: - assert k.interface.cost_permutation == kernels[0].interface.cost_permutation - # We currently assume that all subkernels are consecutive, 0-based within the vector assert None not in kernels @@ -1013,16 +997,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def horizontal_index(self, sf): for i, k in enumerate(self.kernels): - # We need to identify to which part of the vectorized kernel sf - # corresponds. Since splitting might change the cost_permutation we - # exclude it in the comparison below. We also make sure to check - # that derivatives are the same. - from copy import deepcopy - sf_interface = deepcopy(sf.interface) - sf_interface._cost_permutation = None - k_interface = deepcopy(k.interface) - k_interface._cost_permutation = None - if repr(sf_interface) == repr(k_interface): + if sf.interface == k.interface: if tuple(mat.derivative for mat in sf.matrix_sequence_quadrature_permuted) == tuple(mat.derivative for mat in k.matrix_sequence_quadrature_permuted): return i diff --git a/python/dune/codegen/sumfact/tabulation.py b/python/dune/codegen/sumfact/tabulation.py index 7540cc89790392c9ce55cd17b69fcd96370d2de5..9def97eb3ba4fdae280cc65e7f12ca73164d1146 100644 --- a/python/dune/codegen/sumfact/tabulation.py +++ b/python/dune/codegen/sumfact/tabulation.py @@ -145,11 +145,8 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): assert isinstance(tabs, tuple) # Assert that all the basis tabulations match in size! - assert len(set(t.quadrature_size for t in tabs)) == 1 assert len(set(t.basis_size for t in tabs)) == 1 assert len(set(t.transpose for t in tabs)) == 1 - assert len(set(t.direction for t in tabs)) == 1 - assert len(set(t.slice_size for t in tabs)) == 1 self.tabs = tabs if width is None: diff --git a/python/dune/codegen/sumfact/vectorization.py b/python/dune/codegen/sumfact/vectorization.py index 5788cbe84693255d81c15c2745344f3eaea35450..e753652b10b3ac9b1765ee071bce1ccebd15f6b1 100644 --- a/python/dune/codegen/sumfact/vectorization.py +++ b/python/dune/codegen/sumfact/vectorization.py @@ -367,8 +367,6 @@ def level1_optimal_vectorization_strategy(sumfacts, width): def level2_optimal_vectorization_strategy(sumfacts, width, qp): - set_quadrature_points(qp) - # Find the sets of simultaneously realizable kernels keys = frozenset(sf.parallel_key for sf in sumfacts) diff --git a/python/dune/codegen/tools.py b/python/dune/codegen/tools.py index d5c0a18ebe8f7e32316aa459c959ab1eb9ba75f8..0726adc8ff612825e1270f41ee95f2943e04738f 100644 --- a/python/dune/codegen/tools.py +++ b/python/dune/codegen/tools.py @@ -4,6 +4,22 @@ from __future__ import absolute_import import loopy as lp import pymbolic.primitives as prim import frozendict +import pytools + + +class ImmutableCuttingRecord(pytools.ImmutableRecord): + """ + A record implementation that drops fields starting with an underscore + from hash and equality computation + """ + def __repr__(self): + return "{}({})".format(type(self), ",".join(repr(getattr(self, f)) for f in self.__class__.fields if not f.startswith("_"))) + + def __hash__(self): + return hash((type(self),) + tuple(getattr(self, field) for field in self.__class__.fields if not field.startswith("_"))) + + def __eq__(self, other): + return type(self) == type(other) and all(getattr(self, field) == getattr(other, field) for field in self.__class__.fields if not field.startswith("_")) def get_pymbolic_basename(expr):