From 56997384b8ceb9410d27ad0e40a88121a138fdfa Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Tue, 25 Apr 2017 15:41:08 +0200 Subject: [PATCH] Add global coordinate calculation for axiparallel grids (without dune-geometry) --- dune/perftool/common/muladd_workarounds.hh | 26 ++++++++ python/dune/perftool/loopy/target.py | 1 + python/dune/perftool/pdelab/tensors.py | 6 +- python/dune/perftool/sumfact/__init__.py | 6 +- python/dune/perftool/sumfact/geometry.py | 73 ++++++++++++++++++++-- 5 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 dune/perftool/common/muladd_workarounds.hh diff --git a/dune/perftool/common/muladd_workarounds.hh b/dune/perftool/common/muladd_workarounds.hh new file mode 100644 index 00000000..9ca81ed0 --- /dev/null +++ b/dune/perftool/common/muladd_workarounds.hh @@ -0,0 +1,26 @@ +#ifndef DUNE_PERFTOOL_MULADD_WORKAROUNDS_HH +#define DUNE_PERFTOOL_MULADD_WORKAROUNDS_HH + +/* We are currently having some issues with FMA nodes not being + * eliminated correctly upon code generation. We "solve" the problem + * for now with overloads of the mul_add function for scalars. + */ + +#include<dune/perftool/common/opcounter.hh> + + +inline double mul_add(double op1, double& op2, double op3) +{ + return op1 * op2 + op3; +} + +#ifdef ENABLE_COUNTER + +op::OpCounter<double> mul_add(op::OpCounter<double> op1, op::OpCounter<double>& op2, op::OpCounter<double> op3) +{ + return op1 * op2 + op3; +} + +#endif + +#endif diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index bd73b61e..83d42e9d 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -84,6 +84,7 @@ class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): def map_fused_multiply_add(self, expr, type_context): if self.codegen_state.vectorization_info: + include_file("dune/perftool/common/muladd_workarounds.hh", filetag="operatorfile") # If this is vectorized we call the VCL function mul_add return prim.Call(prim.Variable("mul_add"), (self.rec(expr.mul_op1, type_context), diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py index 496c5250..a8e15d84 100644 --- a/python/dune/perftool/pdelab/tensors.py +++ b/python/dune/perftool/pdelab/tensors.py @@ -19,12 +19,14 @@ def define_list_tensor(name, expr, visitor, stack=()): if isinstance(child, ListTensor): define_list_tensor(name, child, visitor, stack=stack + (i,)) else: - # TODO: the dependency below is not very nice! + visexpr = visitor.call(child) + from loopy.symbolic import DependencyMapper + deps = DependencyMapper(include_subscripts=False, include_lookups=False, include_calls=False)(visexpr) instruction(assignee=prim.Subscript(prim.Variable(name), stack + (i,)), expression=visitor.call(child), forced_iname_deps=frozenset(visitor.interface.quadrature_inames()), + depends_on=frozenset({lp.match.Or(tuple(lp.match.Writes(v.name) for v in deps))}).union(frozenset({lp.match.Tagged("sumfact_stage1")})), tags=frozenset({"quad"}), - depends_on=frozenset({"sumfact_stage1"}), ) diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py index 6a07678a..3b519622 100644 --- a/python/dune/perftool/sumfact/__init__.py +++ b/python/dune/perftool/sumfact/__init__.py @@ -1,3 +1,5 @@ +from dune.perftool.generation import get_backend +from dune.perftool.options import option_switch from dune.perftool.pdelab.argument import (name_applycontainer, name_coefficientcontainer, ) @@ -60,7 +62,7 @@ class SumFactInterface(PDELabInterface): # return to_global(pymbolic_quadrature_position()) def pymbolic_spatial_coordinate(self, visitor=None): - from dune.perftool.sumfact.geometry import pymbolic_spatial_coordinate - ret, indices = pymbolic_spatial_coordinate(visitor.indices) + import dune.perftool.sumfact.geometry + ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(visitor.indices) visitor.indices = indices return ret diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py index 02e7a75b..9a8d4f1f 100644 --- a/python/dune/perftool/sumfact/geometry.py +++ b/python/dune/perftool/sumfact/geometry.py @@ -1,23 +1,29 @@ """ Sum factorized geometry evaluations """ -from dune.perftool.generation import (domain, +from dune.perftool.generation import (backend, + domain, get_backend, get_counted_variable, iname, instruction, kernel_cached, + preamble, temporary_variable, + globalarg, ) from dune.perftool.loopy.buffer import get_buffer_temporary from dune.perftool.pdelab.geometry import (local_dimension, world_dimension, + name_geometry, ) from dune.perftool.sumfact.symbolic import SumfactKernelInputBase from dune.perftool.sumfact.vectorization import attach_vectorization_info +from dune.perftool.options import option_switch from pytools import ImmutableRecord import pymbolic.primitives as prim +import numpy as np @iname @@ -38,8 +44,6 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord): ) ciname = corner_iname() - - from dune.perftool.pdelab.geometry import name_geometry geo = name_geometry() # NB: We need to realize this as a C instruction, because the corner @@ -61,9 +65,9 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord): tags=frozenset({"sumfact_stage{}".format(sf.stage)}), ) - @kernel_cached -def pymbolic_spatial_coordinate(visitor_indices): +@backend(interface="spatial_coordinate", name="default") +def pymbolic_spatial_coordinate_multilinear(visitor_indices): assert len(visitor_indices) == 1 # Construct the matrix sequence for the evaluation of the global coordinate. @@ -89,3 +93,62 @@ def pymbolic_spatial_coordinate(visitor_indices): var, _ = realize_sum_factorization_kernel(vsf) return prim.Subscript(var, vsf.quadrature_index(sf)), None + + +@preamble +def define_corner(name, low): + geo = name_geometry() + return "auto {} = {}.corner({});".format(name, + geo, + 0 if low else 2 ** local_dimension() - 1) + + +@preamble +def define_mesh_width(name): + define_corner(name, False) + lower = name_lowerleft_corner() + return "{} -= {};".format(name, lower) + + +def name_lowerleft_corner(): + name = "lowerleft_corner" + globalarg(name, dtype=np.float64, shape=(world_dimension(),)) + define_corner(name, True) + return name + + +def name_meshwidth(): + name = "meshwidth" + globalarg(name, dtype=np.float64, shape=(world_dimension(),)) + define_mesh_width(name) + return name + + +@kernel_cached +@backend(interface="spatial_coordinate", name="diagonal_transformation_matrix") +def pymbolic_spatial_coordinate_axiparallel(visitor_indices): + assert len(visitor_indices) == 1 + index, = visitor_indices + + # Urgh: *SOMEHOW* construct a face direction + from dune.perftool.pdelab.restriction import Restriction + restriction = Restriction.NONE + from dune.perftool.generation import get_global_context_value + if get_global_context_value("integral_type") == "interior_facet": + restriction = Restriction.NEGATIVE + from dune.perftool.sumfact.switch import get_facedir + face = get_facedir(restriction) + + lowcorner = name_lowerleft_corner() + meshwidth = name_meshwidth() + + if index == face: + x = 0 + else: + iindex = index + if face is not None and index > face: + iindex = iindex - 1 + from dune.perftool.sumfact.quadrature import pymbolic_quadrature_position + x = prim.Subscript(pymbolic_quadrature_position(), (iindex,)) + + return prim.Subscript(prim.Variable(lowcorner), (index,)) + x * prim.Subscript(prim.Variable(meshwidth), (index,)), None -- GitLab