diff --git a/python/dune/perftool/pdelab_preambles.py b/python/dune/perftool/pdelab_preambles.py index ed9d6fad49043804e094f416cce0593bff6885f0..4f426a9f532917df982e6edcd3cafa6b2a42bac4 100644 --- a/python/dune/perftool/pdelab_preambles.py +++ b/python/dune/perftool/pdelab_preambles.py @@ -1,4 +1,5 @@ -from dune.perftool.preambles import dune_preambel, dune_symbol +from dune.perftool.preambles import dune_preamble, dune_symbol, loop_domain, loopy_iname, temporary_variable, loopy_c_instruction, loopy_expr_instruction +from dune.perftool.pdelab_names import name @dune_symbol def test_function_name(arg, grad): @@ -12,10 +13,59 @@ def trial_function_name(arg, grad): def index_name(index): return str(index._indices[0]) -@dune_symbol -def argument_index(arg): - return "arg{}".format(chr(ord("i") + arg.number())) +@loop_domain +def argument_loop_domain(name): + return name + +@loopy_iname +def argument_iname(arg): + name = "arg{}".format(chr(ord("i") + arg.number())) + argument_loop_domain(name) + return name @dune_symbol -def dimension(arg): +def dimension(): return "dim" + +@loop_domain +def quadrature_loop_domain(name): + return name + +@loopy_iname +def quadrature_iname(): + quadrature_loop_domain("q") + return "q" + +from dune.perftool.preambles import _dune_decorator_factory, CInstructionCacheItem +quadrature_preamble = _dune_decorator_factory(cache_item_type=CInstructionCacheItem, inames=quadrature_iname()) + +@dune_symbol +def quadrature_rule(): + return "rule" + +@quadrature_preamble(assignees=name("quadrature_factor")) +def define_quadrature_factor(): + rule = quadrature_rule() + return "auto {} = {}->weight();".format(name("quadrature_factor"), rule) + +@temporary_variable +def quadrature_factor(): + define_quadrature_factor() + return name("quadrature_factor") + +@loop_domain(upperbound=dimension()) +def dimension_loop_domain(name): + return name + +@loopy_iname +def dimension_iname(index): + dimension_loop_domain(index_name(index)) + return index_name(index) + +@dune_symbol +def residual_name(): + return "r" + +@loopy_expr_instruction +def accumulation_instruction(expr): + return expr \ No newline at end of file diff --git a/python/dune/perftool/preambles.py b/python/dune/perftool/preambles.py index a11eed6de55507db3d38f6c6458182010d4ee4cd..ad902b907cfb990cc29cf8bfb7f04b4a6c9f2541 100644 --- a/python/dune/perftool/preambles.py +++ b/python/dune/perftool/preambles.py @@ -2,40 +2,70 @@ a complex requirement structure. This includes: * preambles triggering the creation of other preambles * a caching mechanism to avoid duplicated preambles where harmful -""" - - -class UFL2LoopyDataCache(dict): - """ The cache data structure - - The data is stored as key value pairs of the following form: - (function, cache_tuple) -> (content, priority_tag, preamble) - The parameters are: - function : the function that generates the preamble code snippet - cache_tuple : A frozen (sub)set of arguments to the function. - The map from function arguments to cache tuple controls - the amount of caching. - content : The content to store. No assumptions made. - priority_tag : Will later decide the ordering of the preambles. - preamble : A bool whether this cache entry does generate a loopy preamble. - This is usually not the case, when you generate something - like symbol names. Those are inserted into other snippets, - but not on their own right. - """ - def __init__(self): - self.counter = 0 - - def register(self, cachekey, content): - self[cachekey] = (content, self.counter) - self.counter = self.counter + 1 +TODO rename to generation_cache. I will use it for much more than preambles. +""" - def extract_preambles(self): - return [(p[1], p[0]) for p in self.values() if p[0].generate_preambel] +from pytools import Record + +# Base class for all cache items. +class UFL2LoopyCacheItem(Record): + __slots__ = ["content", "returnValue"] + + def __init__(self, content): + self.content = content + self.returnValue = content + +class NoReturnCacheItem(UFL2LoopyCacheItem): + def __init__(self, content): + UFL2LoopyCacheItem.__init__(self, content) + self.returnValue = None + +class LoopyInameCacheItem(UFL2LoopyCacheItem): + pass + +class SymbolCacheItem(UFL2LoopyCacheItem): + pass + +class PreambleCacheItem(NoReturnCacheItem): + counter = 0 + def __init__(self, content): + NoReturnCacheItem.__init__(self, content) + self.content = (PreambleCacheItem.counter, content) + PreambleCacheItem.counter = PreambleCacheItem.counter + 1 + +class TemporaryVariableCacheItem(UFL2LoopyCacheItem): + import numpy + def __init__(self, content, dtype=numpy.float64): + UFL2LoopyCacheItem.__init__(self, content) + from loopy import TemporaryVariable + self.content = TemporaryVariable(content, dtype) + self.returnVariable = content + +class InstructionCacheItem(NoReturnCacheItem): + def __init__(self, content): + NoReturnCacheItem.__init__(self, content) + +class CInstructionCacheItem(InstructionCacheItem): + def __init__(self, content, inames=[], assignees=[]): + InstructionCacheItem.__init__(self, content) + from loopy import CInstruction + self.content = CInstruction(inames, content, assignees=assignees) + +class ExpressionInstructionCacheItem(InstructionCacheItem): + pass + +class LoopDomainCacheItem(UFL2LoopyCacheItem): + def __init__(self, content, upperbound=None): + if upperbound: + self.content = "{{ [{0}] : 0<={0}<{1} }}".format(content, upperbound) + else: + self.content = "{{ [{0}] : 0<={0}<{0}_n }}".format(content) + self.returnValue = content -# have one such cache on the module level. It is easier than handing around an instance of it. -_cache = UFL2LoopyDataCache() +# have one cache the module level. It is easier than handing around an instance of it. +_cache = {} def freeze(data): @@ -67,29 +97,43 @@ def freeze(data): # we don't know how to handle this object, so we give up raise TypeError('Cannot freeze non-hashable object {} of type {}'.format(data, type(data))) +class _NoCachingCounter(object): + counter = 0 + def get(self): + _NoCachingCounter.counter = _NoCachingCounter.counter + 1 + return _NoCachingCounter.counter + + +def no_caching(*a): + return _NoCachingCounter.get() + class _RegisteredFunction(object): """ The data structure for a function that accesses UFL2LoopyDataCache """ def __init__(self, func, cache_key_generator=lambda *a : freeze(a), - generate_preamble=False, - return_content=False + cache_item_type=UFL2LoopyCacheItem, + **kwargs ): self.func = func self.cache_key_generator = cache_key_generator - self.generate_preamble = generate_preamble - self.return_content = return_content + self.cache_item_type = cache_item_type + self.kwargs = kwargs + + assert issubclass(cache_item_type, UFL2LoopyCacheItem) def __call__(self, *args): + # Get the cache key from the given arguments cache_key = (self, self.cache_key_generator(*args)) + # check whether we have a cache hit if cache_key in _cache: - if self.return_content: - return _cache[cache_key][0] + # and return the result depending on the cache item type + return _cache[cache_key].returnValue else: - content = self.func(*args) - _cache.register(cache_key, content) - if self.return_content: - return content + # evaluate the original function and wrap it in a cache item + content = self.cache_item_type(self.func(*args), **self.kwargs) + _cache[cache_key] = content + return content.returnValue def _dune_decorator_factory(**factory_kwargs): @@ -109,14 +153,10 @@ def _dune_decorator_factory(**factory_kwargs): cache_key_generator : function A function that maps the arguments to the function to an immutable cache key. Defaults to generate_cache_tuple. - generate_preambel : bool - Whether a code snippet should be generated. - This is usually not the case, when you generate something - like symbol names. Those are inserted into other snippets, - but not on their own right. - return_content : bool - Whether the content of the cache should be returned on subsequent function calls. - If the cache is only used for requirements tracking, this is not needed. + cache_item_type : type of UFL2LoopyCacheItem + The type of to wrap the contents in when storing in the cache. + + Any excess keyword arguments will be forwarded to the CacheItem! """ def _dec(*args, **kwargs): # Modify the kwargs according to the factorys kwargs @@ -130,9 +170,23 @@ def _dune_decorator_factory(**factory_kwargs): return _dec +def cache_preambles(): + return [v.content for v in _cache.values() if isinstance(v, PreambleCacheItem)] + +def cache_instructions(): + return [v.content for v in _cache.values() if isinstance(v, InstructionCacheItem)] + +def cache_temporaries(): + return {v.returnValue: v.content for v in _cache.values() if isinstance(v, TemporaryVariableCacheItem)} -# A decorator for a dune preambel -dune_preambel = _dune_decorator_factory(generate_preamble=True) +def cache_loop_domains(): + return [v.content for v in _cache.values() if isinstance(v, LoopDomainCacheItem)] -# A decorator for a dune symbol name -dune_symbol = _dune_decorator_factory(return_content=True) +# Define some decorators that will be useful! +loop_domain = _dune_decorator_factory(cache_item_type=LoopDomainCacheItem) +dune_preamble = _dune_decorator_factory(cache_item_type=PreambleCacheItem) +loopy_iname = _dune_decorator_factory(cache_item_type=LoopyInameCacheItem) +dune_symbol = _dune_decorator_factory(cache_item_type=SymbolCacheItem) +temporary_variable = _dune_decorator_factory(cache_item_type=TemporaryVariableCacheItem) +loopy_c_instruction = _dune_decorator_factory(cache_item_type=CInstructionCacheItem) +loopy_expr_instruction = _dune_decorator_factory(cache_item_type=ExpressionInstructionCacheItem) diff --git a/python/dune/perftool/target.py b/python/dune/perftool/target.py index b783f9c9c53a1342a7a942baebb8d4aa25bbb71b..ce420b128efaf8e308a1278ead45f9782ec9935a 100644 --- a/python/dune/perftool/target.py +++ b/python/dune/perftool/target.py @@ -4,28 +4,22 @@ import six from loopy.target import TargetBase from loopy.target.c.codegen.expression import LoopyCCodeMapper +class AllToDouble(dict): + """ This imitates a dict that maps everything to double and logs the requested keys """ + def __getitem__(self, key): + self.__setitem__(key, numpy.float64) + return numpy.float64 + + _registry = {'float32': 'float', - 'int32' : 'int'} + 'int32' : 'int', + 'float64' : 'double'} class MyMapper(LoopyCCodeMapper): var_subst_map = {} -def dune_function_manglers(): - # OpenCL example impls: target/opencl/__init__.py:108 - return [] - -def dune_symbol_manglers(): - # OpenCL example impls: target/opencl/__init__.py:137 - return [] - class DuneTarget(TargetBase): - def function_manglers(self): - return super(DuneTarget, self).function_manglers() + dune_function_manglers() - - def symbol_manglers(self): - return super(DuneTarget, self).symbol_manglers() + dune_symbol_manglers() - def get_or_register_dtype(self, names, dtype=None): return dtype @@ -44,7 +38,6 @@ class DuneTarget(TargetBase): return MyMapper(codegen_state) def generate_code(self, kernel, codegen_state, impl_arg_info): - print "Somebody wants me to generate code!" from cgen import Block body = Block() diff --git a/python/dune/perftool/transformer.py b/python/dune/perftool/transformer.py index 5053bec202131cabbcc0d196566b8bcd7e110f53..ec393f18d628d07ed82e5b637fe877c7e3600864 100644 --- a/python/dune/perftool/transformer.py +++ b/python/dune/perftool/transformer.py @@ -3,66 +3,43 @@ from ufl.algorithms import MultiFunction from pymbolic.primitives import Variable, Subscript, Sum, Product from loopy.kernel.data import ExpressionInstruction, CInstruction +import loopy +import numpy # For now, import all implemented preambles, restrict later from dune.perftool.pdelab_preambles import * - +from dune.perftool.restriction import Restriction class UFLVisitor(MultiFunction): - def __init__(self): - MultiFunction.__init__(self) - # Have a cache for the loop domains - self.loop_domains = {} - self.loop_instructions = [] - self.temporary_variables = {} - + def __call__(self, o): # Make this multifunction stateful: Store information similar to uflacs' modified terminal self.grad = False self.reference_grad = False self.index = None + self.restriction = Restriction.NONE - # We do always have a quadrature loop: - # We need an additional loop domain - self.loop_domains.setdefault("quadrature", "{ [q] : 0<=q<qn }") - - # We need instructions to evaluate weight and position - # Add a temporary variable for the integration factor - self.temporary_variables["fac"] = loopy.TemporaryVariable("fac", dtype=numpy.float32) - - # Add the CInstruction's needed to correctly set up the quadrature. - self.loop_instructions.append( - CInstruction( - "q", - code="fac = r->weight();", - assignees="fac" - ) - ) + # Have a short cut for the bases class call operator + self.call = lambda *a : MultiFunction.__call__(self, *a) + # Have a list of argument indices that this term depends on. self.argument_indices = [] - def add_accumulation(self, loopyexpr): - # TODO how to determine the accumulation loop details here? + loopyexpr = self.call(o) + # TODO no jacobian support yet! assert len(self.argument_indices) == 1 assignee = Subscript(Variable("r"), Variable(self.argument_indices[0])) - loopyexpr = Sum((assignee, Product((loopyexpr, Variable("fac"))))) - - self.loop_instructions.append( - ExpressionInstruction( - assignee=assignee, - expression=loopyexpr - ) - ) + loopyexpr = Sum((assignee, Product((Variable(quadrature_factor()), loopyexpr)))) - # Reset the argument indices. - self.argument_indices = [] + instruction = ExpressionInstruction(assignee=assignee, expression=loopyexpr) + accumulation_instruction(instruction) def grad(self, o): assert(len(o.operands()) == 1) self.grad = True - ret = self(o.operands()[0]) + ret = self.call(o.operands()[0]) self.grad = False return ret @@ -70,8 +47,7 @@ class UFLVisitor(MultiFunction): assert(len(o.operands()) == 0) # We do need an argument loop domain for this argument - argindex = argument_index(o) - self.loop_domains.setdefault(o, "{{[{0}] : 0 <= {0} < {0}n}}".format(argindex)) + argindex = argument_iname(o) # Generate the variable name for the test function name = test_function_name(o, self.grad) @@ -89,16 +65,8 @@ class UFLVisitor(MultiFunction): index = index_name(self.index) return Subscript(Variable(name), Variable(index)) - def index_sum(self, o): - # Add a loop domain based on the index - index = index_name(o.operands()[1]) - self.loop_domains.setdefault(index, "{{[{0}] : 0 <= {0} < {1}}}".format(index, o.dimension())) - # Get the expression - loopyexpr = self(o.operands()[0]) - self.add_accumulation(loopyexpr) - def product(self, o): - return Product(tuple(self(op) for op in o.operands())) + return Product(tuple(self.call(op) for op in o.operands())) def multi_index(self, o): # I don't think we should ever find a multi index, because its father should take care of it. @@ -107,6 +75,30 @@ class UFLVisitor(MultiFunction): def indexed(self, o): # TODO in the long run, this is a stack of indices. self.index = o.operands()[1] - ret = self(o.operands()[0]) + ret = self.call(o.operands()[0]) self.index = None return ret + + +class TopSumSeparation(MultiFunction): + """ A multifunction that separates the toplevel sum """ + def __init__(self): + MultiFunction.__init__(self) + self.visitor = UFLVisitor() + + def expr(self, o): + print "Call TopSumSeparation.expr with {}".format(o) + self.visitor(o) + + def sum(self, o): + for op in o.operands(): + self(op) + + def index_sum(self, o): + print "Call TopSumSeparation.index_sum with {}".format(o) + dimension_iname(o.operands()[1]) + self(o.operands()[0]) + + +def transform_expression(expr): + return TopSumSeparation()(expr)