diff --git a/applications/knl/poisson_dg/poisson_dg.ufl b/applications/knl/poisson_dg/poisson_dg.ufl index 2c0f2c34436f1c6b76817d6abad344fae3183d62..7b5c3e548f81688a0735256411fd54e9d49fa9f4 100644 --- a/applications/knl/poisson_dg/poisson_dg.ufl +++ b/applications/knl/poisson_dg/poisson_dg.ufl @@ -12,21 +12,21 @@ n = FacetNormal(cell)('+') alpha = 1.0 h_ext = CellVolume(cell) / FacetArea(cell) -cse_gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) -cse_gamma_int = (alpha * degree * (degree + dim - 1)) / h_int +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = -1.0 r = inner(grad(u), grad(v))*dx \ + inner(n, avg(grad(u)))*jump(v)*dS \ - + cse_gamma_int*jump(u)*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ - theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + cse_gamma_ext*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - f*v*dx \ - theta*g*inner(grad(v), n)*ds \ - - cse_gamma_ext*g*v*ds + - gamma_ext*g*v*ds forms = [r] diff --git a/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl b/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl index 00267f15b9331b4705138cfb1a4698ee357a5767..d3c95a5226e26981cf78e5d8b78a686150a9310d 100644 --- a/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl +++ b/applications/knl/poisson_dg_tensor/poisson_dg_tensor.ufl @@ -16,20 +16,20 @@ n = FacetNormal(cell)('+') alpha = 3.0 h_ext = CellVolume(cell) / FacetArea(cell) -cse_gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) -cse_gamma_int = (alpha * degree * (degree + dim - 1)) / h_int +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = -1.0 r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \ + inner(n, A*avg(grad(u)))*jump(v)*dS \ - + cse_gamma_int*jump(u)*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ - theta*jump(u)*inner(A*avg(grad(v)), n)*dS \ - inner(n, A*grad(u))*v*ds \ - + cse_gamma_ext*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(A*grad(v), n)*ds \ - theta*g*inner(A*grad(v), n)*ds \ - - cse_gamma_ext*g*v*ds + - gamma_ext*g*v*ds forms = [r] diff --git a/applications/poisson_dg/poisson_dg.ufl b/applications/poisson_dg/poisson_dg.ufl index 2c0f2c34436f1c6b76817d6abad344fae3183d62..7b5c3e548f81688a0735256411fd54e9d49fa9f4 100644 --- a/applications/poisson_dg/poisson_dg.ufl +++ b/applications/poisson_dg/poisson_dg.ufl @@ -12,21 +12,21 @@ n = FacetNormal(cell)('+') alpha = 1.0 h_ext = CellVolume(cell) / FacetArea(cell) -cse_gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) -cse_gamma_int = (alpha * degree * (degree + dim - 1)) / h_int +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = -1.0 r = inner(grad(u), grad(v))*dx \ + inner(n, avg(grad(u)))*jump(v)*dS \ - + cse_gamma_int*jump(u)*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ - theta*jump(u)*inner(avg(grad(v)), n)*dS \ - inner(n, grad(u))*v*ds \ - + cse_gamma_ext*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(grad(v), n)*ds \ - f*v*dx \ - theta*g*inner(grad(v), n)*ds \ - - cse_gamma_ext*g*v*ds + - gamma_ext*g*v*ds forms = [r] diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.ufl b/applications/poisson_dg_tensor/poisson_dg_tensor.ufl index 00267f15b9331b4705138cfb1a4698ee357a5767..d3c95a5226e26981cf78e5d8b78a686150a9310d 100644 --- a/applications/poisson_dg_tensor/poisson_dg_tensor.ufl +++ b/applications/poisson_dg_tensor/poisson_dg_tensor.ufl @@ -16,20 +16,20 @@ n = FacetNormal(cell)('+') alpha = 3.0 h_ext = CellVolume(cell) / FacetArea(cell) -cse_gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) -cse_gamma_int = (alpha * degree * (degree + dim - 1)) / h_int +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int theta = -1.0 r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \ + inner(n, A*avg(grad(u)))*jump(v)*dS \ - + cse_gamma_int*jump(u)*jump(v)*dS \ + + gamma_int*jump(u)*jump(v)*dS \ - theta*jump(u)*inner(A*avg(grad(v)), n)*dS \ - inner(n, A*grad(u))*v*ds \ - + cse_gamma_ext*u*v*ds \ + + gamma_ext*u*v*ds \ + theta*u*inner(A*grad(v), n)*ds \ - theta*g*inner(A*grad(v), n)*ds \ - - cse_gamma_ext*g*v*ds + - gamma_ext*g*v*ds forms = [r] diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py index d2639ac6934b10a936711744c515890eba2d0a02..8d1dcc503ff8971db3d11858e47d372f834c7948 100644 --- a/python/dune/perftool/compile.py +++ b/python/dune/perftool/compile.py @@ -84,9 +84,6 @@ def read_ufl(uflfile): # Enrich data by some additional objects we deem worth keeping if get_option("exact_solution_expression"): data.object_by_name[get_option("exact_solution_expression")] = namespace[get_option("exact_solution_expression")] - for name, expr in namespace.items(): - if name.startswith("cse"): - data.object_by_name[name] = namespace[name] formdatas = [] forms = data.forms diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index d8ab37fbb57f176e4776eb0436de1ef88363254d..c819772e874a6264a05276873d7ff553fb41188b 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -32,7 +32,6 @@ from dune.perftool.generation.cpp import (base_class, from dune.perftool.generation.loopy import (barrier, constantarg, - construct_subst_rule, domain, function_mangler, get_temporary_name, @@ -43,8 +42,6 @@ from dune.perftool.generation.loopy import (barrier, kernel_cached, noop_instruction, silenced_warning, - set_subst_rule, - subst_rule, temporary_variable, transform, valuearg, diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index d098c798fe13f1bdd34fd4502fc3b8a0b4f6fa1c..8cc073fc68c89572dd0dd5b9e5947cc5690f5439 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -198,31 +198,3 @@ def loopy_class_member(name, classtag=None, potentially_vectorized=False, **kwar globalarg(name, **kwargs) return name - - -@generator_factory(item_tags=("substrule_name",), - context_tags="kernel", - cache_key_generator=lambda e, n: e) -def _substrule_name(expr, name): - return name - - -@generator_factory(item_tags=("substrule_impl",), - context_tags="kernel", - cache_key_generator=lambda n, e, **ex: e, - ) -def subst_rule(name, expr, exists=False): - _substrule_name(expr, name) - return exists - - -def set_subst_rule(name, expr): - subst_rule(name, expr, exists=True) - - -@generator_factory(item_tags=("substrule",), - context_tags="kernel") -def construct_subst_rule(expr, visitor): - name = _substrule_name(expr, None) - assert name - return lp.SubstitutionRule(name, (), visitor(expr)) diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index b64503b4f52eb853edd770de513cda8faa28c6ff..167de058f85b773978d4d6cef101101df72bf840 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -3,6 +3,7 @@ is filled and then does vector computations """ from dune.perftool.generation import (function_mangler, include_file, + loopy_class_member, ) from dune.perftool.loopy.vcl import get_vcl_type, get_vcl_type_size from dune.perftool.loopy.transformations.vectorview import (add_temporary_with_vector_view, @@ -11,7 +12,7 @@ from dune.perftool.loopy.transformations.vectorview import (add_temporary_with_v ) from dune.perftool.loopy.symbolic import substitute from dune.perftool.sumfact.quadrature import quadrature_inames -from dune.perftool.tools import get_pymbolic_basename, get_pymbolic_tag +from dune.perftool.tools import get_pymbolic_basename, get_pymbolic_tag, ceildiv from dune.perftool.options import get_option from loopy.kernel.creation import parse_domains @@ -19,6 +20,7 @@ from loopy.symbolic import pw_aff_to_expr from loopy.match import Tagged from loopy.symbolic import DependencyMapper +from pytools import product import pymbolic.primitives as prim import loopy as lp @@ -234,6 +236,23 @@ def collect_vector_data_rotate(knl): replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (vector_indices.get(1), prim.Variable(new_iname)), ) + elif quantity in [a.name for a in knl.args]: + arg, = [a for a in knl.args if a.name == quantity] + tags = set(get_pymbolic_tag(expr) for expr in quantity_exprs) + if tags and tags.pop() == "operator_precomputed": + expr, = quantity_exprs + shape=(ceildiv(product(s for s in arg.shape), vec_size), vec_size) + name = loopy_class_member(quantity, + shape=shape, + dim_tags="f,vec", + potentially_vectorized=True, + classtag="operator", + dtype=np.float64, + ) + knl = knl.copy(args=knl.args + [lp.GlobalArg(name, shape=shape, dim_tags="c,vec", dtype=np.float64)]) + replacemap_vec[expr] = prim.Subscript(prim.Variable(name), + (vector_indices.get(1), prim.Variable(new_iname)), + ) new_insns = [i.copy(expression=substitute(i.expression, replacemap_arr)) for i in new_insns] diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 7d7712d91e394998aeb0377a11bd62eb7cdf9c62..8dff610bd5750df4176f684aeb3542ad476ffcab 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -65,6 +65,7 @@ class PerftoolOptionsArray(ImmutableRecord): # Arguments that are mainly to be set by logic depending on other options max_vector_width = PerftoolOption(default=256, helpstr=None) unroll_dimension_loops = PerftoolOption(default=False, helpstr="whether loops over the gemetric dimension should be unrolled.") + precompute_quadrature_info = PerftoolOption(default=True, helpstr="whether loops over the gemetric dimension should be unrolled.") # Until more sophisticated logic is needed, we keep the actual option data in this module diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py index ac9ad0e46f5203a23b52ef52090f28cd2a811b72..d05bcad57ddde467de202a09f337c873317c5452 100644 --- a/python/dune/perftool/pdelab/__init__.py +++ b/python/dune/perftool/pdelab/__init__.py @@ -35,6 +35,10 @@ from dune.perftool.pdelab.tensors import pymbolic_list_tensor, pymbolic_identity class PDELabInterface(object): + def __init__(self): + # The visitor instance will be registered by its init method + self.visitor = None + # # TODO: The following ones are actually entirely PDELab independent! # They should be placed elsewhere and be used directly in the visitor. @@ -63,16 +67,16 @@ class PDELabInterface(object): def pymbolic_reference_gradient(self, element, restriction, number): return pymbolic_reference_gradient(element, restriction, number) - def pymbolic_trialfunction_gradient(self, element, restriction, component, visitor=None): + def pymbolic_trialfunction_gradient(self, element, restriction, component): return pymbolic_trialfunction_gradient(element, restriction, component) - def pymbolic_apply_function_gradient(self, element, restriction, component, visitor=None): + def pymbolic_apply_function_gradient(self, element, restriction, component): return pymbolic_apply_function_gradient(element, restriction, component) - def pymbolic_trialfunction(self, element, restriction, component, visitor=None): + def pymbolic_trialfunction(self, element, restriction, component): return pymbolic_trialfunction(element, restriction, component) - def pymbolic_apply_function(self, element, restriction, component, visitor=None): + def pymbolic_apply_function(self, element, restriction, component): return pymbolic_apply_function(element, restriction, component) # @@ -89,8 +93,8 @@ class PDELabInterface(object): # Tensor expression related generator functions # - def pymbolic_list_tensor(self, o, visitor): - return pymbolic_list_tensor(o, visitor) + def pymbolic_list_tensor(self, o): + return pymbolic_list_tensor(o, self.visitor) def pymbolic_identity(self, o): return pymbolic_identity(o) @@ -99,7 +103,7 @@ class PDELabInterface(object): # Geometry related generator functions # - def pymbolic_spatial_coordinate(self, visitor=None): + def pymbolic_spatial_coordinate(self): return to_global(pymbolic_quadrature_position()) def name_facet_jacobian_determinant(self): diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 3bae1ea9554c7c5f5a751be17128bf215c8b0b55..4e7227e12b1925ee59588023685471460046c3e6 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -19,7 +19,6 @@ from dune.perftool.generation import (backend, post_include, retrieve_cache_functions, retrieve_cache_items, - set_subst_rule, template_parameter, ) from dune.perftool.cgen.clazz import (AccessModifier, @@ -408,25 +407,6 @@ def visit_integrals(integrals): from dune.perftool.ufl.visitor import UFL2LoopyVisitor visitor = UFL2LoopyVisitor(interface, measure, indexmap) - # Inspect the UFL file for manual common subexpression elimination stuff! - data = get_global_context_value("data") - for name, expr in data.object_by_name.items(): - if name.startswith("cse"): - set_subst_rule(name, expr) - - # Ensure CSE on detjac * quadrature weight - domain = accterm.term.ufl_domain() - if measure == "cell": - set_subst_rule("integration_factor_cell1", - uc.QuadratureWeight(domain) * uc.Abs(uc.JacobianDeterminant(domain))) - set_subst_rule("integration_factor_cell2", - uc.Abs(uc.JacobianDeterminant(domain)) * uc.QuadratureWeight(domain)) - else: - set_subst_rule("integration_factor_facet1", - uc.FacetJacobianDeterminant(domain) * uc.QuadratureWeight(domain)) - set_subst_rule("integration_factor_facet2", - uc.QuadratureWeight(domain) * uc.FacetJacobianDeterminant(domain)) - get_backend(interface="accum_insn")(visitor, accterm, measure, subdomain_id) diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py index a8e15d846be6cbb958508652b105cb8a1c64a83e..2996ac5e24ecb5abf59980cf5ded3af223a4d4bd 100644 --- a/python/dune/perftool/pdelab/tensors.py +++ b/python/dune/perftool/pdelab/tensors.py @@ -25,7 +25,7 @@ def define_list_tensor(name, expr, visitor, stack=()): instruction(assignee=prim.Subscript(prim.Variable(name), stack + (i,)), expression=visitor.call(child), forced_iname_deps=frozenset(visitor.interface.quadrature_inames()), - depends_on=frozenset({lp.match.Or(tuple(lp.match.Writes(v.name) for v in deps))}).union(frozenset({lp.match.Tagged("sumfact_stage1")})), + depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}), tags=frozenset({"quad"}), ) diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py index f860a1fcade1275b12f80ecd8d85bbc70799efca..36d18d54abc1d6bc3c5fa9b19f7cce2142662c29 100644 --- a/python/dune/perftool/sumfact/__init__.py +++ b/python/dune/perftool/sumfact/__init__.py @@ -31,24 +31,24 @@ class SumFactInterface(PDELabInterface): def pymbolic_reference_gradient(self, element, restriction, number): return pymbolic_reference_gradient(element, restriction, number) - def pymbolic_trialfunction_gradient(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, visitor.indices) - visitor.indices = indices + def pymbolic_trialfunction_gradient(self, element, restriction, component): + ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, self.visitor.indices) + self.visitor.indices = indices return ret - def pymbolic_trialfunction(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, visitor.indices) - visitor.indices = indices + def pymbolic_trialfunction(self, element, restriction, component): + ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, self.visitor.indices) + self.visitor.indices = indices return ret - def pymbolic_apply_function_gradient(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, visitor.indices) - visitor.indices = indices + def pymbolic_apply_function_gradient(self, element, restriction, component): + ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, self.visitor.indices) + self.visitor.indices = indices return ret - def pymbolic_apply_function(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, visitor.indices) - visitor.indices = indices + def pymbolic_apply_function(self, element, restriction, component): + ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, self.visitor.indices) + self.visitor.indices = indices return ret def quadrature_inames(self): @@ -57,8 +57,8 @@ class SumFactInterface(PDELabInterface): def pymbolic_quadrature_weight(self): return quadrature_weight() - def pymbolic_spatial_coordinate(self, visitor=None): + def pymbolic_spatial_coordinate(self): import dune.perftool.sumfact.geometry - ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(visitor.indices) - visitor.indices = indices + ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(self.visitor.indices) + self.visitor.indices = indices return ret diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py index 9a8d4f1f46f0f44c88dace42b93fe1927bc0b698..fce0ec9b8b403ed2b5439f70b418421823ff1804 100644 --- a/python/dune/perftool/sumfact/geometry.py +++ b/python/dune/perftool/sumfact/geometry.py @@ -149,6 +149,6 @@ def pymbolic_spatial_coordinate_axiparallel(visitor_indices): if face is not None and index > face: iindex = iindex - 1 from dune.perftool.sumfact.quadrature import pymbolic_quadrature_position - x = prim.Subscript(pymbolic_quadrature_position(), (iindex,)) + x = pymbolic_quadrature_position(iindex) return prim.Subscript(prim.Variable(lowcorner), (index,)) + x * prim.Subscript(prim.Variable(meshwidth), (index,)), None diff --git a/python/dune/perftool/sumfact/quadrature.py b/python/dune/perftool/sumfact/quadrature.py index 5e6935c3ba368e94300d2e22e6885f98e544af29..d12b7aeb84da5c107235c7cf5872245a0e0500e1 100644 --- a/python/dune/perftool/sumfact/quadrature.py +++ b/python/dune/perftool/sumfact/quadrature.py @@ -4,6 +4,7 @@ from dune.perftool.generation import (backend, get_global_context_value, iname, instruction, + loopy_class_member, temporary_variable, ) @@ -15,6 +16,7 @@ from dune.perftool.pdelab.argument import name_accumulation_variable from dune.perftool.pdelab.geometry import (dimension_iname, local_dimension, ) +from dune.perftool.options import get_option from loopy import CallMangleInfo from loopy.symbolic import FunctionIdentifier @@ -26,7 +28,9 @@ from pymbolic.primitives import (Call, Variable, ) -import numpy +import pymbolic.primitives as prim +import loopy as lp +import numpy as np def nest_quadrature_loops(kernel, inames): @@ -58,7 +62,7 @@ class BaseWeight(FunctionIdentifier): @function_mangler def base_weight_function_mangler(target, func, dtypes): if isinstance(func, BaseWeight): - return CallMangleInfo(func.name, (NumpyType(numpy.float64),), ()) + return CallMangleInfo(func.name, (NumpyType(np.float64),), ()) def pymbolic_base_weight(): @@ -82,6 +86,18 @@ def quadrature_inames(): return tuple(sumfact_quad_iname(d, quadrature_points_per_direction()) for d in range(local_dimension())) +@iname(kernel="operator") +def constructor_quad_iname(name, d, bound): + name = "{}_{}".format(name, d) + domain(name, quadrature_points_per_direction(), kernel="operator") + return name + + +def constructor_quadrature_inames(dim, num1d): + name = "quadiname_dim{}_num{}".format(dim, num1d) + return tuple(constructor_quad_iname(name, d, quadrature_points_per_direction()) for d in range(local_dimension())) + + def define_recursive_quadrature_weight(name, dir): iname = quadrature_inames()[dir] temporary_variable(name, shape=(), shape_impl=()) @@ -107,25 +123,76 @@ def recursive_quadrature_weight(dir=0): def quadrature_weight(): - return recursive_quadrature_weight() + # Return non-precomputed version + if not get_option("precompute_quadrature_info"): + return recursive_quadrature_weight() + + dim = local_dimension() + num1d = quadrature_points_per_direction() + name = "quad_weights_dim{}_num{}".format(dim, num1d) + + # Add a class member + loopy_class_member(name, + dtype=np.float64, + shape=(num1d,) * dim, + classtag="operator", + dim_tags=",".join(["f"] * dim), + managed=True, + potentially_vectorized=True, + ) + + # Precompute it in the constructor + inames = constructor_quadrature_inames(dim, num1d) + instruction(assignee=prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames)), + expression=prim.Product(tuple(Subscript(Variable(name_oned_quadrature_weights()), (prim.Variable(i),)) for i in inames)), + within_inames=frozenset(inames), + kernel="operator", + ) + return prim.Subscript(lp.symbolic.TaggedVariable(name, "operator_precomputed"), tuple(prim.Variable(i) for i in quadrature_inames())) -def define_quadrature_position(name): - for i in range(local_dimension()): - instruction(expression=Subscript(Variable(name_oned_quadrature_points()), (Variable(quadrature_inames()[i]),)), - assignee=Subscript(Variable(name), (i,)), - forced_iname_deps=frozenset(quadrature_inames()), - forced_iname_deps_is_final=True, - tags=frozenset({"quad"}), - ) + +def define_quadrature_position(name, index): + instruction(expression=Subscript(Variable(name_oned_quadrature_points()), (Variable(quadrature_inames()[index]),)), + assignee=Subscript(Variable(name), (index,)), + forced_iname_deps=frozenset(quadrature_inames()), + forced_iname_deps_is_final=True, + tags=frozenset({"quad"}), + ) @backend(interface="quad_pos", name="sumfact") -def pymbolic_quadrature_position(): - name = 'pos' - temporary_variable(name, shape=(local_dimension(),), shape_impl=("fv",)) - define_quadrature_position(name) - return Variable(name) +def pymbolic_quadrature_position(index): + # Return the non-precomputed version + if not get_option("precompute_quadrature_info"): + name = 'pos' + temporary_variable(name, shape=(local_dimension(),), shape_impl=("fv",)) + define_quadrature_position(name, index) + return prim.Subscript(prim.Variable(name), (index,)) + + assert isinstance(index, int) + dim = local_dimension() + num1d = quadrature_points_per_direction() + name = "quad_points_dim{}_num{}_dir{}".format(dim, num1d, index) + + loopy_class_member(name, + dtype=np.float64, + shape=(num1d,) * dim, + classtag="operator", + dim_tags=",".join(["f"] * dim), + managed=True, + potentially_vectorized=True, + ) + + # Precompute it in the constructor + inames = constructor_quadrature_inames(dim, num1d) + instruction(assignee=prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames)), + expression=Subscript(Variable(name_oned_quadrature_points()), (prim.Variable(inames[index]))), + within_inames=frozenset(inames), + kernel="operator", + ) + + return prim.Subscript(lp.symbolic.TaggedVariable(name, "operator_precomputed"), tuple(prim.Variable(i) for i in quadrature_inames())) @backend(interface="qp_in_cell", name="sumfact") diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index 9ccd5891c60d6840ceb2e9e3baf4981a54a62ed9..c0405c6a244faa71fe52e4fbe7ce326cab220722 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -25,7 +25,7 @@ from dune.perftool.pdelab.localoperator import (name_domain_field, lop_template_range_field, ) from dune.perftool.pdelab.quadrature import quadrature_order -from dune.perftool.tools import maybe_wrap_subscript +from dune.perftool.tools import maybe_wrap_subscript, ceildiv from loopy import CallMangleInfo from loopy.symbolic import FunctionIdentifier from loopy.types import NumpyType @@ -37,10 +37,6 @@ import loopy as lp import numpy as np -def ceildiv(a, b): - return -(-a // b) - - class BasisTabulationMatrixBase(object): pass diff --git a/python/dune/perftool/tools.py b/python/dune/perftool/tools.py index e6259e44b0cd4d030587741f8361e804b8fd3e55..a0dfaf29af06abcdc3f3e49d71291e663536e130 100644 --- a/python/dune/perftool/tools.py +++ b/python/dune/perftool/tools.py @@ -59,3 +59,7 @@ def get_pymbolic_tag(expr): return get_pymbolic_tag(expr.aggregate) raise NotImplementedError("Cannot determine tag on {}".format(expr)) + + +def ceildiv(a, b): + return -(-a // b) diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index 6c7ddb1afa67c9abc3ed1e007a91e4ad4dbd94ee..9ec9afd1bc65990126f9c225335d975a305733c5 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -3,11 +3,7 @@ This module defines the main visitor algorithm transforming ufl expressions to pymbolic and loopy. """ from dune.perftool.error import PerftoolUFLError -from dune.perftool.generation import (construct_subst_rule, - domain, - get_global_context_value, - subst_rule, - ) +from dune.perftool.generation import get_global_context_value, domain from dune.perftool.ufl.flatoperators import get_operands from dune.perftool.ufl.modified_terminals import (ModifiedTerminalTracker, Restriction, @@ -39,11 +35,11 @@ import pymbolic.primitives as prim class UFL2LoopyVisitor(ModifiedTerminalTracker): - def __init__(self, interface, measure, dimension_indices, donot_check_substrules=None): + def __init__(self, interface, measure, dimension_indices): self.interface = interface + self.interface.visitor = self self.measure = measure self.dimension_indices = dimension_indices - self.donot_check_substrules = donot_check_substrules # Call base class constructors super(UFL2LoopyVisitor, self).__init__() @@ -56,18 +52,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): return self.call(o) - def call(self, o): - if o != self.donot_check_substrules and subst_rule(None, o): - rule = construct_subst_rule(o, - type(self)(self.interface, - self.measure, - self.dimension_indices, - donot_check_substrules=o, - ) - ) - return prim.Call(prim.Variable(rule.name), ()) - else: - return MultiFunction.__call__(self, o) + call = MultiFunction.__call__ # # Form argument/coefficients handlers: @@ -131,14 +116,14 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): if self.reference_grad: if o.count() == 0: - return self.interface.pymbolic_trialfunction_gradient(o.ufl_element(), restriction, self.component, visitor=self) + return self.interface.pymbolic_trialfunction_gradient(o.ufl_element(), restriction, self.component) else: - return self.interface.pymbolic_apply_function_gradient(o.ufl_element(), restriction, self.component, visitor=self) + return self.interface.pymbolic_apply_function_gradient(o.ufl_element(), restriction, self.component) else: if o.count() == 0: - return self.interface.pymbolic_trialfunction(o.ufl_element(), restriction, self.component, visitor=self) + return self.interface.pymbolic_trialfunction(o.ufl_element(), restriction, self.component) else: - return self.interface.pymbolic_apply_function(o.ufl_element(), restriction, self.component, visitor=self) + return self.interface.pymbolic_apply_function(o.ufl_element(), restriction, self.component) # Check if this is a parameter function else: @@ -231,7 +216,12 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): return self._index_or_fixed_index(o) def list_tensor(self, o): - return self.interface.pymbolic_list_tensor(o, self) + if all(isinstance(i, int) for i in self.indices): + index = self.indices[0] + self.indices = self.indices[1:] + return self.call(o.ufl_operands[index]) + else: + return self.interface.pymbolic_list_tensor(o) def identity(self, o): return self.interface.pymbolic_identity(o) @@ -355,12 +345,21 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): if get_global_context_value("driver", False): return prim.Variable("x") else: - return self.interface.pymbolic_spatial_coordinate(self) + return self.interface.pymbolic_spatial_coordinate() def facet_normal(self, o): # The normal must be restricted to be well-defined assert self.restriction is not Restriction.NONE + # Optimize facet normal on axiparallel grids + from dune.perftool.options import get_option + if get_option("diagonal_transformation_matrix"): + index, = self.indices + from dune.perftool.sumfact.switch import get_facedir + if isinstance(index, int) and index != get_facedir(self.restriction): + self.indices = None + return 0 + if self.restriction == Restriction.POSITIVE: return Variable(self.interface.name_unit_outer_normal()) if self.restriction == Restriction.NEGATIVE: diff --git a/test/sumfact/poisson/CMakeLists.txt b/test/sumfact/poisson/CMakeLists.txt index 72fbf5af11794ff5d3b80ed16fe6576c071103ea..824f2721f163a46d6a76ac9c256c95b3f9885c4c 100644 --- a/test/sumfact/poisson/CMakeLists.txt +++ b/test/sumfact/poisson/CMakeLists.txt @@ -13,7 +13,7 @@ dune_add_formcompiler_system_test(UFLFILE poisson_3d.ufl #================ # Opcounter tests #================ -dune_add_formcompiler_system_test(UFLFILE opcount_poisson_2d_order2.ufl +dune_add_formcompiler_system_test(UFLFILE poisson_2d.ufl BASENAME opcount_sumfact_poisson_2d_order2 INIFILE opcount_poisson_2d_order2.mini ) diff --git a/test/sumfact/poisson/opcount_poisson_2d_order2.mini b/test/sumfact/poisson/opcount_poisson_2d_order2.mini index 627f5bb3eb7cee357a8dc10db82353491153d648..d5fe76eca69bc4e3cd2e12800f816675d83ff50a 100644 --- a/test/sumfact/poisson/opcount_poisson_2d_order2.mini +++ b/test/sumfact/poisson/opcount_poisson_2d_order2.mini @@ -18,3 +18,6 @@ compare_l2errorsquared = 1e-8 sumfact = 1 opcounter = 1 instrumentation_level = 4 + +[formcompiler.ufl_variants] +degree = 2 diff --git a/test/sumfact/poisson/opcount_poisson_2d_order2.ufl b/test/sumfact/poisson/opcount_poisson_2d_order2.ufl deleted file mode 100644 index 23982245c5917beb2e74fbedc0ecf399fec549c2..0000000000000000000000000000000000000000 --- a/test/sumfact/poisson/opcount_poisson_2d_order2.ufl +++ /dev/null @@ -1,10 +0,0 @@ -cell = "quadrilateral" - -f = Expression("Dune::FieldVector<oc::OpCounter<double>,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*exp(-1.*c.two_norm2());", cell=cell) -g = Expression("Dune::FieldVector<oc::OpCounter<double>,2> c(0.5); c-= x; return exp(-1.*c.two_norm2());", cell=cell) - -V = FiniteElement("CG", cell, 2, dirichlet_expression=g) -u = TrialFunction(V) -v = TestFunction(V) - -forms = [(inner(grad(u), grad(v)) - f*v)*dx]