diff --git a/patches/apply_patches.sh b/patches/apply_patches.sh index 0ac07e6614e86c9a439584a71d6f17b0a1a8a90d..e7a0b5f373139932981fa0e491924b685b6076ff 100755 --- a/patches/apply_patches.sh +++ b/patches/apply_patches.sh @@ -2,7 +2,6 @@ pushd python/loopy git apply ../../patches/loopy/Current.patch -git apply ../../patches/loopy/0001-Allow-using-globalargs-as-base-storage.patch popd pushd dune/perftool/vectorclass diff --git a/patches/loopy/0001-Allow-using-globalargs-as-base-storage.patch b/patches/loopy/0001-Allow-using-globalargs-as-base-storage.patch deleted file mode 100644 index 5df322c990864c60614eb1cd3b3bdb928dd76acc..0000000000000000000000000000000000000000 --- a/patches/loopy/0001-Allow-using-globalargs-as-base-storage.patch +++ /dev/null @@ -1,63 +0,0 @@ -From b15ad57a11b774bde714f058c85248990c461048 Mon Sep 17 00:00:00 2001 -From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> -Date: Tue, 23 May 2017 12:55:25 +0200 -Subject: [PATCH] Allow using globalargs as base storage - ---- - loopy/preprocess.py | 2 +- - loopy/target/c/__init__.py | 23 ++++++++++++----------- - 2 files changed, 13 insertions(+), 12 deletions(-) - -diff --git a/loopy/preprocess.py b/loopy/preprocess.py -index 17226b6..65a9df1 100644 ---- a/loopy/preprocess.py -+++ b/loopy/preprocess.py -@@ -165,7 +165,7 @@ def find_temporary_scope(kernel): - for temp_var in six.itervalues(kernel.temporary_variables): - if temp_var.base_storage is not None: - # no nesting allowed -- if temp_var.base_storage in kernel_var_names: -+ if temp_var.base_storage in kernel.temporary_variables: - raise LoopyError("base_storage for temporary '%s' is '%s', " - "which is an existing variable name" - % (temp_var.name, temp_var.base_storage)) -diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py -index e4835a3..7c6a617 100644 ---- a/loopy/target/c/__init__.py -+++ b/loopy/target/c/__init__.py -@@ -439,20 +439,21 @@ class CASTBuilder(ASTBuilderBase): - assert tv.initializer is None - - offset = 0 -- base_storage_sizes.setdefault(tv.base_storage, []).append( -- tv.nbytes) -- base_storage_to_scope.setdefault(tv.base_storage, []).append( -- tv.scope) -+ if not tv.base_storage in kernel.args: -+ base_storage_sizes.setdefault(tv.base_storage, []).append( -+ tv.nbytes) -+ base_storage_to_scope.setdefault(tv.base_storage, []).append( -+ tv.scope) - -- align_size = tv.dtype.itemsize -+ align_size = tv.dtype.itemsize - -- from loopy.kernel.array import VectorArrayDimTag -- for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): -- if isinstance(dim_tag, VectorArrayDimTag): -- align_size *= axis_len -+ from loopy.kernel.array import VectorArrayDimTag -+ for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): -+ if isinstance(dim_tag, VectorArrayDimTag): -+ align_size *= axis_len - -- base_storage_to_align_bytes.setdefault(tv.base_storage, []).append( -- align_size) -+ base_storage_to_align_bytes.setdefault(tv.base_storage, []).append( -+ align_size) - - for idi in decl_info: - cast_decl = POD(self, idi.dtype, "") --- -2.1.4 - diff --git a/python/dune/perftool/blockstructured/basis.py b/python/dune/perftool/blockstructured/basis.py index 59d55174faa0f9fbc0133bcf6cbd523913477a4f..77b266d75d9b0312b5bd9e8e8763223b05dd5852 100644 --- a/python/dune/perftool/blockstructured/basis.py +++ b/python/dune/perftool/blockstructured/basis.py @@ -21,6 +21,8 @@ from dune.perftool.pdelab.spaces import type_leaf_gfs from dune.perftool.pdelab.restriction import restricted_name from dune.perftool.blockstructured.spaces import lfs_inames +from ufl import MixedElement + import pymbolic.primitives as prim @@ -79,7 +81,7 @@ def evaluate_basis(leaf_element, name, restriction): def pymbolic_basis(leaf_element, restriction, number, context=''): - assert leaf_element.num_sub_elements() == 0 + assert not isinstance(leaf_element, MixedElement) name = "phi_{}".format(FEM_name_mangling(leaf_element)) name = restricted_name(name, restriction) evaluate_basis(leaf_element, name, restriction) @@ -102,7 +104,7 @@ def evaluate_reference_gradient(leaf_element, name, restriction): def pymbolic_reference_gradient(leaf_element, restriction, number, context=''): - assert leaf_element.num_sub_elements() == 0 + assert not isinstance(leaf_element, MixedElement) name = "js_{}".format(FEM_name_mangling(leaf_element)) name = restricted_name(name, restriction) evaluate_reference_gradient(leaf_element, name, restriction) diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index 2bda1822594203be1e406e6452154fb13fd8f8f5..0c7ab18a63f12b9e870992aad88143d65229ef67 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -35,7 +35,11 @@ from dune.perftool.pdelab.restriction import restricted_name from dune.perftool.pdelab.driver import (isPk, isQk, isDG) + from pymbolic.primitives import Product, Subscript, Variable + +from ufl import MixedElement + from loopy import Reduction @@ -107,7 +111,7 @@ def evaluate_basis(leaf_element, name, restriction): def pymbolic_basis(leaf_element, restriction, number, context=''): - assert leaf_element.num_sub_elements() == 0 + assert not isinstance(leaf_element, MixedElement) name = "phi_{}".format(FEM_name_mangling(leaf_element)) name = restricted_name(name, restriction) evaluate_basis(leaf_element, name, restriction) @@ -135,7 +139,7 @@ def evaluate_reference_gradient(leaf_element, name, restriction): def pymbolic_reference_gradient(leaf_element, restriction, number, context=''): - assert leaf_element.num_sub_elements() == 0 + assert not isinstance(leaf_element, MixedElement) name = "js_{}".format(FEM_name_mangling(leaf_element)) name = restricted_name(name, restriction) evaluate_reference_gradient(leaf_element, name, restriction) @@ -156,7 +160,7 @@ def shape_as_pymbolic(shape): @kernel_cached def evaluate_coefficient(visitor, element, name, container, restriction, index): sub_element = element - if element.num_sub_elements() > 0: + if isinstance(element, MixedElement): sub_element = element.extract_component(index)[1] from ufl import FiniteElement @@ -189,7 +193,7 @@ def evaluate_coefficient(visitor, element, name, container, restriction, index): @kernel_cached def evaluate_coefficient_gradient(visitor, element, name, container, restriction, index): sub_element = element - if element.num_sub_elements() > 0: + if isinstance(element, MixedElement): sub_element = element.extract_component(index)[1] from ufl import FiniteElement assert isinstance(sub_element, FiniteElement) diff --git a/python/dune/perftool/pdelab/driver/gridfunctionspace.py b/python/dune/perftool/pdelab/driver/gridfunctionspace.py index bf47920a94513d0075b92d3b65bc8357081e371e..bf20ff6695941280b39d5680a6ca087ca4e54da2 100644 --- a/python/dune/perftool/pdelab/driver/gridfunctionspace.py +++ b/python/dune/perftool/pdelab/driver/gridfunctionspace.py @@ -223,31 +223,31 @@ def name_test_gfs(): return name_gfs(element, is_dirichlet) -def name_gfs(element, is_dirichlet, treepath=()): +def name_gfs(element, is_dirichlet, treepath=(), root=True): if isinstance(element, (VectorElement, TensorElement)): subel = element.sub_elements()[0] - subgfs = name_gfs(subel, is_dirichlet[:subel.value_size()], treepath=treepath + (0,)) + subgfs = name_gfs(subel, is_dirichlet[:subel.value_size()], treepath=treepath + (0,), root=False) name = "{}_pow{}gfs_{}".format(subgfs, element.num_sub_elements(), "_".join(str(t) for t in treepath)) - define_power_gfs(element, is_dirichlet, name, subgfs) + define_power_gfs(element, is_dirichlet, name, subgfs, root) return name elif isinstance(element, MixedElement): k = 0 subgfs = [] for i, subel in enumerate(element.sub_elements()): - subgfs.append(name_gfs(subel, is_dirichlet[k:k + subel.value_size()], treepath=treepath + (i,))) + subgfs.append(name_gfs(subel, is_dirichlet[k:k + subel.value_size()], treepath=treepath + (i,), root=False)) k = k + subel.value_size() name = "_".join(subgfs) name = "{}_{}".format(name, "_".join(str(t) for t in treepath)) - define_composite_gfs(element, is_dirichlet, name, tuple(subgfs)) + define_composite_gfs(element, is_dirichlet, name, tuple(subgfs), root) return name else: assert isinstance(element, (FiniteElement, TensorProductElement)) name = "{}{}_gfs_{}".format(FEM_name_mangling(element).lower(), "_dirichlet" if is_dirichlet[0] else "", "_".join(str(t) for t in treepath)) - define_gfs(element, is_dirichlet, name) + define_gfs(element, is_dirichlet, name, root) return name @@ -263,34 +263,34 @@ def type_trial_gfs(): return type_gfs(element, is_dirichlet) -def type_gfs(element, is_dirichlet): +def type_gfs(element, is_dirichlet, root=True): if isinstance(element, (VectorElement, TensorElement)): subel = element.sub_elements()[0] - subgfs = type_gfs(subel, is_dirichlet[:subel.value_size()]) + subgfs = type_gfs(subel, is_dirichlet[:subel.value_size()], root=False) name = "{}_POW{}GFS".format(subgfs, element.num_sub_elements()) - typedef_power_gfs(element, is_dirichlet, name, subgfs) + typedef_power_gfs(element, is_dirichlet, name, subgfs, root) return name elif isinstance(element, MixedElement): k = 0 subgfs = [] for subel in element.sub_elements(): - subgfs.append(type_gfs(subel, is_dirichlet[k:k + subel.value_size()])) + subgfs.append(type_gfs(subel, is_dirichlet[k:k + subel.value_size()], root=False)) k = k + subel.value_size() name = "_".join(subgfs) - typedef_composite_gfs(element, name, tuple(subgfs)) + typedef_composite_gfs(element, name, tuple(subgfs), root) return name else: assert isinstance(element, (FiniteElement, TensorProductElement)) name = "{}{}_GFS".format(FEM_name_mangling(element).upper(), "_dirichlet" if is_dirichlet[0] else "", ) - typedef_gfs(element, is_dirichlet, name) + typedef_gfs(element, is_dirichlet, name, root) return name @preamble -def define_gfs(element, is_dirichlet, name): - gfstype = type_gfs(element, is_dirichlet) +def define_gfs(element, is_dirichlet, name, root): + gfstype = type_gfs(element, is_dirichlet, root=root) gv = name_leafview() fem = name_fem(element) return ["{} {}({}, {});".format(gfstype, name, gv, fem), @@ -298,24 +298,23 @@ def define_gfs(element, is_dirichlet, name): @preamble -def define_power_gfs(element, is_dirichlet, name, subgfs): - gv = name_leafview() - fem = name_fem(element.sub_elements()[0]) - gfstype = type_gfs(element, is_dirichlet) - return ["{} {}({}, {});".format(gfstype, name, gv, fem), - "{}.name(\"{}\");".format(name, name)] +def define_power_gfs(element, is_dirichlet, name, subgfs, root): + gfstype = type_gfs(element, is_dirichlet, root=root) + names = ["using namespace Dune::Indices;"] + names = names + ["{0}.child(_{1}).name(\"{0}_{1}\");".format(name, i) for i in range(element.num_sub_elements())] + return ["{} {}({});".format(gfstype, name, subgfs)] + names @preamble -def define_composite_gfs(element, is_dirichlet, name, subgfs): - gfstype = type_gfs(element, is_dirichlet) +def define_composite_gfs(element, is_dirichlet, name, subgfs, root): + gfstype = type_gfs(element, is_dirichlet, root=root) return ["{} {}({});".format(gfstype, name, ", ".join(subgfs)), "{}.update();".format(name)] @preamble -def typedef_gfs(element, is_dirichlet, name): - vb = type_vectorbackend(element) +def typedef_gfs(element, is_dirichlet, name, root): + vb = type_vectorbackend(element, root) gv = type_leafview() fem = type_fem(element) cass = type_constraintsassembler(bool(is_dirichlet[0])) @@ -323,58 +322,50 @@ def typedef_gfs(element, is_dirichlet, name): @preamble -def typedef_power_gfs(element, is_dirichlet, name, subgfs): - include_file("dune/pdelab/gridfunctionspace/vectorgridfunctionspace.hh", filetag="driver") - gv = type_leafview() - vb = type_vectorbackend(element) - fem = type_fem(element.sub_elements()[0]) - dim = element.num_sub_elements() - cass = type_constraintsassembler(bool(is_dirichlet[0])) - return "using {} = Dune::PDELab::VectorGridFunctionSpace<{}, {}, {}, {}, {}, {}>;".format(name, gv, fem, dim, vb, vb, cass) +def typedef_power_gfs(element, is_dirichlet, name, subgfs, root): + include_file("dune/pdelab/gridfunctionspace/powergridfunctionspace.hh", filetag="driver") + vb = type_vectorbackend(element, root) + ot = type_orderingtag(False) + + return "using {} = Dune::PDELab::PowerGridFunctionSpace<{}, {}, {}, {}>;".format(name, subgfs, element.num_sub_elements(), vb, ot) @preamble -def typedef_composite_gfs(element, name, subgfs): - vb = type_vectorbackend(element) - ot = type_orderingtag() +def typedef_composite_gfs(element, name, subgfs, root): + vb = type_vectorbackend(element, root) + ot = type_orderingtag(isinstance(element, FiniteElement)) args = ", ".join(subgfs) return "using {} = Dune::PDELab::CompositeGridFunctionSpace<{}, {}, {}>;".format(name, vb, ot, args) @preamble -def define_blocksize(name, element): - from dune.perftool.pdelab.driver import isDG, isQuadrilateral - assert isDG(element) - assert isQuadrilateral(element.cell()) - dimension = get_dimension() - degree = element.degree() - return "static const int {} = Dune::QkStuff::QkSize<{}, {}>::value;".format(name, degree, dimension) - - -def name_blocksize(element): - name = "blocksize" - define_blocksize(name, element) - return name - - -@preamble -def typedef_vectorbackend(name, element): +def typedef_vectorbackend(name, element, root): include_file("dune/pdelab/backend/istl.hh", filetag="driver") - if get_option("fastdg"): - blocksize = name_blocksize(element) - return "using {} = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::fixed, {}>;".format(name, blocksize) + if get_option("fastdg") and root: + blocking = "Dune::PDELab::ISTL::Blocking::fixed" else: - return "using {} = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none, 1>;".format(name) + blocking = "Dune::PDELab::ISTL::Blocking::none" + if isinstance(element, MixedElement): + blocksize = "" + else: + include_file("dune/pdelab/finiteelement/qkdglagrange.hh", filetag="driver") + blocksize = ", Dune::QkStuff::QkSize<{}, {}>::value".format(element.degree(), get_dimension()) -def type_vectorbackend(element): - name = "VectorBackend" - typedef_vectorbackend(name, element) + return "using {} = Dune::PDELab::ISTL::VectorBackend<{}{}>;".format(name, blocking, blocksize) + + +def type_vectorbackend(element, root): + name = "VectorBackend{}".format(FEM_name_mangling(element).upper()) + typedef_vectorbackend(name, element, root) return name -def type_orderingtag(): - return "Dune::PDELab::LexicographicOrderingTag" +def type_orderingtag(leaf): + if leaf or not get_option("fastdg"): + return "Dune::PDELab::LexicographicOrderingTag" + else: + return "Dune::PDELab::EntityBlockedOrderingTag" @preamble diff --git a/python/dune/perftool/pdelab/driver/gridoperator.py b/python/dune/perftool/pdelab/driver/gridoperator.py index eb6cfd893ddd68c4f987e7dea77c3a501c9c71b6..39071cc8db29d90ff5189808ef4991a2c2794707 100644 --- a/python/dune/perftool/pdelab/driver/gridoperator.py +++ b/python/dune/perftool/pdelab/driver/gridoperator.py @@ -134,7 +134,7 @@ def name_dofestimate(): @preamble def typedef_matrixbackend(name): include_file("dune/pdelab/backend/istl.hh", filetag="driver") - return "using {} = Dune::PDELab::istl::BCRSMatrixBackend<>;".format(name) + return "using {} = Dune::PDELab::ISTL::BCRSMatrixBackend<>;".format(name) def type_matrixbackend(): diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index 19f19f8c8483f7a9d2864a3ca08e4d88db10f4be..dac38218879442f9cc9c91d9a3c41cd2f3cbec5c 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -266,9 +266,12 @@ def generate_accumulation_instruction(expr, visitor): stage=3, position_priority=priority, accumvar=accumvar, - trial_element=trial_leaf_element, + test_element=test_info.element, + test_element_index=test_info.element_index, + trial_element=trial_info.element, + trial_element_index=trial_info.element_index, input=AlreadyAssembledInput(index=(test_info.element_index,)), - component_index=test_info.element_index, + predicates=predicates, ) from dune.perftool.sumfact.vectorization import attach_vectorization_info diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index 00ef29661f33d4c7f32600babe622789caca50d3..d01c8ef8edead963d0bd56fd4e34c956484957d3 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -160,10 +160,10 @@ def pymbolic_coefficient_gradient(element, restriction, index, coeff_func, visit @kernel_cached def pymbolic_coefficient(element, restriction, index, coeff_func, visitor_indices): sub_element = element - if element.num_sub_elements() > 0: + if isinstance(element, MixedElement): sub_element = element.extract_component(index)[1] from ufl import FiniteElement - assert isinstance(sub_element, FiniteElement) + assert isinstance(sub_element, (FiniteElement, TensorProductElement)) # Basis functions per direction basis_size = _basis_functions_per_direction(sub_element) @@ -262,7 +262,7 @@ def pymbolic_basis(element, restriction, number): if number == 0: return 1 - assert element.num_sub_elements() == 0 + assert not isinstance(element, MixedElement) name = "phi_{}".format(FEM_name_mangling(element)) name = restricted_name(name, restriction) diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 5c9e4a5b50fd07d39c1d4639982d05ca82a40224..5be6fc825ef0ee63e313b65a7aecd9c0f3f113d5 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -9,7 +9,9 @@ from dune.perftool.generation import (barrier, globalarg, instruction, post_include, + preamble, silenced_warning, + temporary_variable, transform, ) from dune.perftool.loopy.buffer import (get_buffer_temporary, @@ -27,6 +29,8 @@ from dune.perftool.sumfact.permutation import (sumfact_permutation_strategy, from dune.perftool.sumfact.accumulation import sumfact_iname from dune.perftool.loopy.vcl import ExplicitVCLCast +from ufl import MixedElement + import loopy as lp import numpy as np import pymbolic.primitives as prim @@ -39,6 +43,11 @@ def realize_sum_factorization_kernel(sf, **kwargs): return _realize_sum_factorization_kernel(sf, **kwargs) +@preamble +def alias_data_array(name, data): + return "auto {} = {}.data();".format(name, data) + + @generator_factory(item_tags=("sumfactkernel",), context_tags=("kernel",), cache_key_generator=lambda s, **kw: s.cache_key) @@ -67,10 +76,11 @@ def _realize_sum_factorization_kernel(sf): sf.input.realize(sf, 0, insn_dep) insn_dep = insn_dep.union(frozenset({lp.match.Writes("input_{}".format(sf.buffer))})) - - direct_output = None - if get_option('fastdg') and sf.stage == 3: - direct_output = sf.accumvar + ".data()" + else: + if sf.input.element_index is None: + direct_input_arg = "{}_access".format(direct_input) + else: + direct_input_arg = "{}_access_comp{}".format(direct_input, sf.input.element_index) # Prepare some dim_tags/shapes for later use ftags = ",".join(["f"] * sf.length) @@ -128,13 +138,19 @@ def _realize_sum_factorization_kernel(sf): input_inames = permute_backward(input_inames, perm) inp_shape = permute_backward(inp_shape, perm) - globalarg(direct_input, dtype=np.float64, shape=inp_shape, dim_tags=novec_ftags) + globalarg(direct_input_arg, + dtype=np.float64, + shape=inp_shape, + dim_tags=novec_ftags, + offset=_dof_offset(sf.input.element, sf.input.element_index), + ) + alias_data_array(direct_input_arg, direct_input) if matrix.vectorized: input_summand = prim.Call(ExplicitVCLCast(np.float64, vector_width=sf.vector_width), - (prim.Subscript(prim.Variable(direct_input), + (prim.Subscript(prim.Variable(direct_input_arg), input_inames),)) else: - input_summand = prim.Subscript(prim.Variable(direct_input), + input_summand = prim.Subscript(prim.Variable(direct_input_arg), input_inames + vec_iname) else: # If we did permute the order of a matrices above we also @@ -192,17 +208,41 @@ def _realize_sum_factorization_kernel(sf): # In case of direct output we directly accumulate the result # of the Sumfactorization into some global data structure. - if l == len(matrix_sequence) - 1 and direct_output is not None: + if l == len(matrix_sequence) - 1 and get_option('fastdg') and sf.stage == 3: ft = get_global_context_value("form_type") + if sf.test_element_index is None: + direct_output = "{}_access".format(sf.accumvar) + else: + direct_output = "{}_access_comp{}".format(sf.accumvar, sf.test_element_index) if ft == 'residual' or ft == 'jacobian_apply': - globalarg(direct_output, dtype=np.float64, shape=output_shape, dim_tags=novec_ftags) + globalarg(direct_output, + dtype=np.float64, + shape=output_shape, + dim_tags=novec_ftags, + offset=_dof_offset(sf.test_element, sf.test_element_index), + ) + alias_data_array(direct_output, sf.accumvar) + assignee = prim.Subscript(prim.Variable(direct_output), output_inames) else: assert ft == 'jacobian' + + direct_output = "{}x{}".format(direct_output, sf.trial_element_index) + rowsize = sum(tuple(s for s in _local_sizes(sf.trial_element))) + element = sf.trial_element + if isinstance(element, MixedElement): + element = element.extract_component(sf.trial_element_index)[1] + other_shape = tuple(element.degree() + 1 for e in range(sf.length)) + from pytools import product + manual_strides = tuple("stride:{}".format(rowsize * product(output_shape[:i])) for i in range(sf.length)) + dim_tags = "{},{}".format(novec_ftags, ",".join(manual_strides)) globalarg(direct_output, dtype=np.float64, - shape=output_shape + output_shape, - dim_tags=novec_ftags + "," + novec_ftags) + shape=other_shape + output_shape, + offset=rowsize * _dof_offset(sf.test_element, sf.test_element_index) + _dof_offset(sf.trial_element, sf.trial_element_index), + dim_tags=dim_tags, + ) + alias_data_array(direct_output, sf.accumvar) # TODO: It is at least questionnable, whether using the *order* of the inames in here # for indexing is a good idea. Then again, it is hard to find an alternative. _ansatz_inames = tuple(prim.Variable(i) for i in sf.within_inames) @@ -226,6 +266,7 @@ def _realize_sum_factorization_kernel(sf): forced_iname_deps_is_final=True, depends_on=insn_dep, tags=frozenset({"sumfact_stage{}".format(sf.stage)}), + predicates=sf.predicates, ) }) @@ -248,3 +289,22 @@ def _realize_sum_factorization_kernel(sf): silenced_warning('read_no_write({})'.format(out)) return lp.TaggedVariable(out, sf.tag), insn_dep + + +def _local_sizes(element): + from ufl import FiniteElement, MixedElement + if isinstance(element, MixedElement): + for subel in element.sub_elements(): + for s in _local_sizes(subel): + yield s + else: + assert isinstance(element, FiniteElement) + yield (element.degree() + 1)**element.cell().geometric_dimension() + + +def _dof_offset(element, component): + if component is None: + return 0 + else: + sizes = tuple(s for s in _local_sizes(element)) + return sum(sizes[0:component]) diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index a0cc1054afac0a67f312667f7fa4243700c1ccac..7aa9c327001c807740956a3549966d7de0d5f6a1 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -8,6 +8,8 @@ from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase, BasisTab from pytools import ImmutableRecord, product +from ufl import MixedElement + import pymbolic.primitives as prim import loopy as lp import frozendict @@ -37,8 +39,11 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): insn_dep=frozenset(), input=None, accumvar=None, - component_index=None, + test_element=None, + test_element_index=None, trial_element=None, + trial_element_index=None, + predicates=frozenset(), ): """Create a sum factorization kernel @@ -100,10 +105,12 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): other. input: An SumfactKernelInputBase instance describing the input of the kernel accumvar: The accumulation variable to accumulate into - component_index: Index to get the right input coefficient - (needed for systems of PDEs) trial_element: The leaf element of the trial function space. - Used to correctly nest stage 3 in the sumfact case. + Used to correctly nest stage 3 in the jacobian case. + test_element: The leaf element of the test function space + Used to compute offsets in the fastdg case. + test_element_index: the component of the test_element + trial_element_index: the component of the trial_element """ # Assert the inputs! assert isinstance(matrix_sequence, tuple) @@ -152,7 +159,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): Any two sum factorization kernels having the same cache_key are realized simulatenously! """ - return (self.matrix_sequence, self.restriction, self.stage, self.buffer, self.component_index) + return (self.matrix_sequence, self.restriction, self.stage, self.buffer, self.test_element_index) @property def input_key(self): @@ -186,7 +193,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): return () else: from dune.perftool.sumfact.basis import lfs_inames - return lfs_inames(self.trial_element, self.restriction) + element = self.trial_element + if isinstance(element, MixedElement): + element = element.extract_component(self.trial_element_index)[1] + return lfs_inames(element, self.restriction) def vec_index(self, sf): """ Map an unvectorized sumfact kernel object to its position @@ -203,7 +213,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): return tuple(mat.quadrature_size for mat in self.matrix_sequence) def quadrature_index(self, _): - quad_inames = quadrature_inames(self.trial_element) + element = self.trial_element + if element is not None and isinstance(element, MixedElement): + element = element.extract_component(self.trial_element_index)[1] + quad_inames = quadrature_inames(element) if len(self.matrix_sequence) == local_dimension(): return tuple(prim.Variable(i) for i in quad_inames) @@ -309,6 +322,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) assert len(set(k.length for k in kernels)) == 1 assert len(set(k.restriction for k in kernels)) == 1 assert len(set(k.within_inames for k in kernels)) == 1 + assert len(set(k.predicates for k in kernels)) == 1 # Assert properties of the matrix sequence of the underlying kernels for i in range(kernels[0].length): @@ -379,10 +393,26 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def within_inames(self): return self.kernels[0].within_inames + @property + def test_element(self): + return self.kernels[0].test_element + + @property + def test_element_index(self): + return self.kernels[0].test_element_index + @property def trial_element(self): return self.kernels[0].trial_element + @property + def trial_element_index(self): + return self.kernels[0].trial_element_index + + @property + def predicates(self): + return self.kernels[0].predicates + @property def input(self): assert len(set(k.input for k in self.kernels)) == 1 @@ -437,7 +467,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) return 0 def _quadrature_index(self, sf): - quad_inames = quadrature_inames(self.trial_element) + element = self.trial_element + if element is not None and isinstance(element, MixedElement): + element = element.extract_component(self.trial_element_index)[1] + quad_inames = quadrature_inames(element) index = [] if len(self.matrix_sequence) == local_dimension(): @@ -466,7 +499,10 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) return tuple(index) def vec_index(self, sf): - quad_inames = quadrature_inames(self.trial_element) + element = self.trial_element + if element is not None and isinstance(element, MixedElement): + element = element.extract_component(self.trial_element_index)[1] + quad_inames = quadrature_inames(element) sliced = 0 if len(sf.matrix_sequence) == local_dimension(): for d in range(local_dimension()): diff --git a/python/loopy b/python/loopy index 3b74c2863b4ed82da208375f9f29bba08a3a7c7c..095644a7d7a52e2ae43c747e380ddf6003c9ee18 160000 --- a/python/loopy +++ b/python/loopy @@ -1 +1 @@ -Subproject commit 3b74c2863b4ed82da208375f9f29bba08a3a7c7c +Subproject commit 095644a7d7a52e2ae43c747e380ddf6003c9ee18 diff --git a/test/heatequation/driver.hh b/test/heatequation/driver.hh index 1aaf9f171747325885cfebf6a74103e30575d138..4222ca89cea72d35021faabee8344c24a5cc63f0 100644 --- a/test/heatequation/driver.hh +++ b/test/heatequation/driver.hh @@ -30,7 +30,7 @@ void driver (const GV& gv, const FEM& fem, Dune::ParameterTree& ptree) // Make grid function space typedef Dune::PDELab::ConformingDirichletConstraints CON; - typedef Dune::PDELab::istl::VectorBackend<> VBE; + typedef Dune::PDELab::ISTL::VectorBackend<> VBE; typedef Dune::PDELab::GridFunctionSpace<GV,FEM,CON,VBE> GFS; GFS gfs(gv,fem); gfs.name("Vh"); @@ -63,7 +63,7 @@ void driver (const GV& gv, const FEM& fem, Dune::ParameterTree& ptree) // LOP lop(ptree, params_poisson); - typedef Dune::PDELab::istl::BCRSMatrixBackend<> MBE; + typedef Dune::PDELab::ISTL::BCRSMatrixBackend<> MBE; int degree = ptree.get("fem.degree",(int)1); MBE mbe((int)pow(1+2*degree,dim)); typedef Dune::PDELab::GridOperator<GFS,GFS,LOP,MBE, diff --git a/test/laplace/reference_driver.hh b/test/laplace/reference_driver.hh index db1b55946833b34ae8e925737c7c127241b57f2a..02791b7dcb785463376e4dca0c15407f496d0c74 100644 --- a/test/laplace/reference_driver.hh +++ b/test/laplace/reference_driver.hh @@ -88,7 +88,7 @@ class CDProb }; -void driver(int argc, char** argv){ typedef Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none, 1> VectorBackend; +void driver(int argc, char** argv){ typedef Dune::PDELab::ISTL::VectorBackend<Dune::PDELab::ISTL::Blocking::none, 1> VectorBackend; static const int dim = 2; typedef Dune::UGGrid<dim> Grid; typedef Grid::LeafGridView GV; @@ -110,7 +110,7 @@ void driver(int argc, char** argv){ typedef Dune::PDELab::istl::VectorBackend<D VTKWriter vtkwriter(gv, sublevel); using LOP = Dune::PDELab::ConvectionDiffusionDG<CDProb<GV, R>, DG1_FEM>; typedef DG1_DIRICHLET_GFS::ConstraintsContainer<R>::Type DG1_CC; - typedef Dune::PDELab::istl::BCRSMatrixBackend<> MatrixBackend; + typedef Dune::PDELab::ISTL::BCRSMatrixBackend<> MatrixBackend; typedef Dune::PDELab::GridOperator<DG1_DIRICHLET_GFS, DG1_DIRICHLET_GFS, LOP, MatrixBackend, DF, R, R, DG1_CC, DG1_CC> GO; typedef GO::Traits::Domain V; V x(dg1_dirichlet_gfs); diff --git a/test/nonlinear/reference_driver.hh b/test/nonlinear/reference_driver.hh index db0b072f1e0cd3050d2b6a0ac36adffc91f4cf1c..e24b24e1c534fac76d6c82bf12ec45426d9c327c 100644 --- a/test/nonlinear/reference_driver.hh +++ b/test/nonlinear/reference_driver.hh @@ -56,7 +56,7 @@ void driver (int argc, char** argv){ typedef Dune::PDELab::PkLocalFiniteElementMap<GV,DF,double,1> FEM; FEM fem(gv); typedef Dune::PDELab::ConformingDirichletConstraints CON; - typedef Dune::PDELab::istl::VectorBackend<> VBE; + typedef Dune::PDELab::ISTL::VectorBackend<> VBE; typedef Dune::PDELab::GridFunctionSpace<GV,FEM,CON,VBE> GFS; GFS gfs(gv,fem); gfs.name("Vh"); @@ -85,7 +85,7 @@ void driver (int argc, char** argv){ LOP lop(problem); // Make a global operator - typedef Dune::PDELab::istl::BCRSMatrixBackend<> MBE; + typedef Dune::PDELab::ISTL::BCRSMatrixBackend<> MBE; int degree = initree.get("fem.degree",(int)1); MBE mbe((int)pow(1+2*degree,dim)); typedef Dune::PDELab::GridOperator< diff --git a/test/poisson/reference_driver.hh b/test/poisson/reference_driver.hh index 6c3e45f2f2c59e8c7a08554aa288e86d6dac3cba..c12054dfbc81ae1ed2350fc6b1dc91755f07b8cd 100644 --- a/test/poisson/reference_driver.hh +++ b/test/poisson/reference_driver.hh @@ -94,7 +94,7 @@ class CDProb }; -void driver(int argc, char** argv){ typedef Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none, 1> VectorBackend; +void driver(int argc, char** argv){ typedef Dune::PDELab::ISTL::VectorBackend<Dune::PDELab::ISTL::Blocking::none, 1> VectorBackend; static const int dim = 2; typedef Dune::UGGrid<dim> Grid; typedef Grid::LeafGridView GV; @@ -116,7 +116,7 @@ void driver(int argc, char** argv){ typedef Dune::PDELab::istl::VectorBackend<D VTKWriter vtkwriter(gv, sublevel); using LOP = Dune::PDELab::ConvectionDiffusionDG<CDProb<GV, R>, DG1_FEM>; typedef DG1_DIRICHLET_GFS::ConstraintsContainer<R>::Type DG1_CC; - typedef Dune::PDELab::istl::BCRSMatrixBackend<> MatrixBackend; + typedef Dune::PDELab::ISTL::BCRSMatrixBackend<> MatrixBackend; typedef Dune::PDELab::GridOperator<DG1_DIRICHLET_GFS, DG1_DIRICHLET_GFS, LOP, MatrixBackend, DF, R, R, DG1_CC, DG1_CC> GO; typedef GO::Traits::Domain V; V x(dg1_dirichlet_gfs); diff --git a/test/sumfact/stokes/stokes_3d_dg.mini b/test/sumfact/stokes/stokes_3d_dg.mini index 28063941746a213957276465c8fa993d4fe2c07f..dac8dbc82bebd1edd8eecfbc17a197e094c69995 100644 --- a/test/sumfact/stokes/stokes_3d_dg.mini +++ b/test/sumfact/stokes/stokes_3d_dg.mini @@ -1,4 +1,7 @@ -__name = sumfact_stokes_3d_dg +__name = sumfact_stokes_3d_dg_{__exec_suffix} + +__exec_suffix = {fastdg_suffix} +fastdg_suffix = fastdg, nonfastdg | expand fastdg cells = 4 4 4 extension = 1. 1. 1. @@ -11,3 +14,4 @@ extension = vtu [formcompiler] numerical_jacobian = 0 sumfact = 1 +fastdg = 1, 0 | expand fastdg diff --git a/test/sumfact/stokes/stokes_dg.mini b/test/sumfact/stokes/stokes_dg.mini index e533756a11d14de50003dcb5e89c12928e000416..e3374e4a18e844f6f1356ce45b38e5b5212f015f 100644 --- a/test/sumfact/stokes/stokes_dg.mini +++ b/test/sumfact/stokes/stokes_dg.mini @@ -1,8 +1,8 @@ __name = sumfact_stokes_dg_{__exec_suffix} -__exec_suffix = {diff_suffix}_{vec_suffix} +__exec_suffix = {diff_suffix}_{fastdg_suffix} diff_suffix = symdiff, numdiff | expand num -vec_suffix = quadvec, nonquadvec | expand vec +fastdg_suffix = fastdg, nonfastdg | expand fastdg cells = 8 8 extension = 1. 1. @@ -14,6 +14,8 @@ extension = vtu [formcompiler] numerical_jacobian = 0, 1 | expand num -vectorize_quad = 1, 0 | expand vec compare_l2errorsquared = 1e-8 sumfact = 1 +fastdg = 1, 0 | expand fastdg + +{formcompiler.fastdg} == 1 and {formcompiler.numerical_jacobian} == 1 | exclude \ No newline at end of file