diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py index dd3dbb14ce152d48e206e1de4eaf7736b5e3a3e9..83bfecc46030f1c528f9e51644fd8563dc98c396 100644 --- a/python/dune/perftool/compile.py +++ b/python/dune/perftool/compile.py @@ -19,33 +19,32 @@ def read_ufl(uflfile): def generate_driver(formdata, filename): # The driver module uses a global variable for ease of use - from dune.perftool.driver import set_formdata + from dune.perftool.pdelab.driver import set_formdata set_formdata(formdata) # The vtkoutput is the generating method that triggers all others. # Alternatively, one could use the `solve` method. - from dune.perftool.driver import vtkoutput + from dune.perftool.pdelab.driver import vtkoutput vtkoutput() - from dune.perftool.generation import cache_preambles, cache_includes - includes = cache_includes() + from dune.perftool.generation import retrieve_cache_items # Get all preambles for the driver and sort them. - driver_content = [i[1] for i in sorted(cache_preambles(), key=lambda x : x[0])] + driver_content = [i[1] for i in sorted(retrieve_cache_items(item_name="driver_preamble"), key=lambda x : x[0])] - # And flatten out those, that conained nested lists + # And flatten out those, that contained nested lists def flatjoin(l): if isinstance(l, str): return "\n" + l return "".join(flatjoin(i) for i in l) - from cgen import LiteralBlock, FunctionDeclaration + from cgen import LiteralBlock driver = LiteralBlock(flatjoin(driver_content)) # Write the file. f = open(filename, 'w') - f.write("\n".join(cache_includes())) - f.write("\nvoid driver(int argc, char** argv)\n") + f.write("\n".join(retrieve_cache_items(item_name="driver_include"))) + f.write("\n\nvoid driver(int argc, char** argv)\n") f.write("\n".join(driver.generate())) # Reset the caching data structure @@ -53,18 +52,13 @@ def generate_driver(formdata, filename): delete_cache() -def generate_localoperator(): - pass - - def compile_form(): from dune.perftool.options import get_option ufldata = read_ufl(get_option("uflfile")) - generate_localoperator() - if get_option("driver_file"): generate_driver(ufldata, get_option("driver_file")) if get_option("operator_file"): + from dune.perftool.pdelab.localoperator import generate_localoperator generate_localoperator(ufldata, get_option("operator_file")) diff --git a/python/dune/perftool/generation.py b/python/dune/perftool/generation.py index 3eb7b5b368f541ac4e14d6ad8fce2eea094200bd..9db423254cfb0a09bee0bc80d298f44e52bfbfd6 100644 --- a/python/dune/perftool/generation.py +++ b/python/dune/perftool/generation.py @@ -2,84 +2,13 @@ a complex requirement structure. This includes: * preambles triggering the creation of other preambles * a caching mechanism to avoid duplicated preambles where harmful - -TODO rename to generation_cache. I will use it for much more than preambles. """ -from pytools import Record - -# Base class for all cache items. -class UFL2LoopyCacheItem(Record): - __slots__ = ["content", "returnValue"] - - def __init__(self, content): - self.content = content - self.returnValue = content - -class NoReturnCacheItem(UFL2LoopyCacheItem): - def __init__(self, content): - UFL2LoopyCacheItem.__init__(self, content) - self.returnValue = None - -class LoopyInameCacheItem(UFL2LoopyCacheItem): - pass - -class SymbolCacheItem(UFL2LoopyCacheItem): - pass - -class PreambleCacheItem(NoReturnCacheItem): - counter = 0 - def __init__(self, content): - NoReturnCacheItem.__init__(self, content) - self.content = (PreambleCacheItem.counter, content) - PreambleCacheItem.counter = PreambleCacheItem.counter + 1 - -class TemporaryVariableCacheItem(UFL2LoopyCacheItem): - import numpy - def __init__(self, content, dtype=numpy.float64): - UFL2LoopyCacheItem.__init__(self, content) - from loopy import TemporaryVariable - self.content = TemporaryVariable(content, dtype) - self.returnVariable = content - -class InstructionCacheItem(NoReturnCacheItem): - def __init__(self, content): - NoReturnCacheItem.__init__(self, content) - -class CInstructionCacheItem(InstructionCacheItem): - def __init__(self, content, inames=[], assignees=[]): - InstructionCacheItem.__init__(self, content) - from loopy import CInstruction - self.content = CInstruction(inames, content, assignees=assignees) - -class ExpressionInstructionCacheItem(InstructionCacheItem): - pass - -class LoopDomainCacheItem(UFL2LoopyCacheItem): - def __init__(self, content, upperbound=None): - if upperbound: - self.content = "{{ [{0}] : 0<={0}<{1} }}".format(content, upperbound) - else: - self.content = "{{ [{0}] : 0<={0}<{0}_n }}".format(content) - self.returnValue = content - -class IncludeCacheItem(NoReturnCacheItem): - def __init__(self, content): - NoReturnCacheItem.__init__(self, content) - from cgen import Include - self.content = "#include<{}>".format(content) - - # have one cache the module level. It is easier than handing around an instance of it. _cache = {} -# Have an easy button to delete the cache from other modules -def delete_cache(): - global _cache - _cache = {} - -def freeze(data): +def _freeze(data): """ A function that deterministically generates an immutable item from the given data. Used for cache key generation. """ @@ -96,14 +25,14 @@ def freeze(data): # Check if the given data is already hashable if isinstance(data, Hashable): if isinstance(data, Iterable): - return type(data)(freeze(i) for i in data) + return type(data)(_freeze(i) for i in data) return data # convert standard mutable containers if isinstance(data, MutableMapping): - return tuple((freeze(k), freeze(v)) for k,v in data.iteritems()) + return tuple((_freeze(k), _freeze(v)) for k,v in data.iteritems()) if isinstance(data, Iterable): - return tuple(freeze(i) for i in data) + return tuple(_freeze(i) for i in data) # we don't know how to handle this object, so we give up raise TypeError('Cannot freeze non-hashable object {} of type {}'.format(data, type(data))) @@ -119,39 +48,76 @@ def no_caching(*a): return _NoCachingCounter.get() +class _CacheItemMeta(type): + """ A meta class for cache items. Keyword arguments are forwarded + th decorator factory below (check the documentation there) + """ + def __new__(cls, name, bases, d, counted=False, on_store=lambda x: x): + rettype = type(name, bases, d) + if counted: + original_on_store = on_store + setattr(rettype, '_count', 0) + def add_count(x): + rettype._count = rettype._count + 1 + return (rettype._count, original_on_store(x)) + on_store = add_count + + def _init(s, x): + s.content = on_store(x) + + setattr(rettype, '__init__', _init) + + return rettype + + +from pytools import memoize as _memo +@_memo(use_kwargs=True) +def _construct_cache_item_type(name, **kwargs): + """ Wrap the generation of cache item types from the meta class. + At the same time, memoization assures that types are the same for + multiple cache items + """ + return _CacheItemMeta.__new__(_CacheItemMeta, name, (), {}, **kwargs) + + class _RegisteredFunction(object): """ The data structure for a function that accesses UFL2LoopyDataCache """ + import sys def __init__(self, func, - cache_key_generator=lambda *a : freeze(a), - cache_item_type=UFL2LoopyCacheItem, + cache_key_generator=lambda *a : a, + item_name="CacheItemType", **kwargs ): self.func = func self.cache_key_generator = cache_key_generator - self.cache_item_type = cache_item_type - self.kwargs = kwargs + self.itemtype = _construct_cache_item_type(item_name, **kwargs) + + if item_name == "CacheItemType": + from IPython import embed; embed() - assert issubclass(cache_item_type, UFL2LoopyCacheItem) + # If the specified cache does not exist, create it now. + if self.itemtype not in _cache: + _cache[self.itemtype] = {} def __call__(self, *args): # Get the cache key from the given arguments - cache_key = (self, self.cache_key_generator(*args)) + cache_key = (self, _freeze(self.cache_key_generator(*args))) # check whether we have a cache hit - if cache_key in _cache: + if cache_key in _cache[self.itemtype]: # and return the result depending on the cache item type - return _cache[cache_key].returnValue + return _cache[self.itemtype][cache_key].content else: # evaluate the original function and wrap it in a cache item - content = self.cache_item_type(self.func(*args), **self.kwargs) - _cache[cache_key] = content - return content.returnValue + citem = self.itemtype(self.func(*args)) + _cache[self.itemtype][cache_key] = citem + return citem.content -def _dune_decorator_factory(**factory_kwargs): +def generator_factory(**factory_kwargs): """ A function decorator factory Generates a function decorator, that turns a given function into - a function that stores its result in the UFL2LoopyDataCache. + a function that stores its result in a data cache. The generated decorator may be used with or without keyword arguments. @@ -162,13 +128,24 @@ def _dune_decorator_factory(**factory_kwargs): Possible keyword arguments: --------------------------- cache_key_generator : function - A function that maps the arguments to the function to an immutable cache key. - Defaults to generate_cache_tuple. - cache_item_type : type of UFL2LoopyCacheItem - The type of to wrap the contents in when storing in the cache. - - Any excess keyword arguments will be forwarded to the CacheItem! + A function that maps the arguments of the function to a subset that + determines whether to use a caches result. The return type is arbitrary, + as it will be turned immutable by the cache machine afterwards. + Defaults to identity. + item_name : str + A name to give to the cache item type name. Necessary if you want use + isinstance on the cache with good results... + on_store : function + A function to apply to the return value of the decorated function + before storing in the cache. May be used to apply wrappers. + counted : bool + Will add a counted tag to the cache item. The type of the stored + items automatically turns to a tuple with the counttag being the first entry. + no_deco : bool + Instead of a decorator, return a function that uses identity as a body. """ + no_deco = factory_kwargs.pop("no_deco", False) + def _dec(*args, **kwargs): # Modify the kwargs according to the factorys kwargs for k in factory_kwargs: @@ -179,32 +156,40 @@ def _dune_decorator_factory(**factory_kwargs): else: return lambda f: _RegisteredFunction(f, **kwargs) - return _dec + if no_deco: + return _dec(lambda x: x) + else: + return _dec -def cache_preambles(): - return [v.content for v in _cache.values() if isinstance(v, PreambleCacheItem)] -def cache_instructions(): - return [v.content for v in _cache.values() if isinstance(v, InstructionCacheItem)] +def retrieve_cache_items(item_name=None, generator_function=None, decorator=None): + """ Retrieve items from the cache. These can be selected through various modes: + 1. Setting item_name to the name specified in the decorator. This will return *ALL* + items with that name, especially if multiple items share the same name. + 2. Passing a generator_function will return all items in the cache that are of + the same type as the items generated by the given function. + 3. Passing a decorator will return all items in the cache that are of the same + type as the items generated by the given decorator + """ + # Only one mode can be active + assert sum(bool(t) for t in [item_name, generator_function, decorator]) == 1 -def cache_temporaries(): - return {v.returnValue: v.content for v in _cache.values() if isinstance(v, TemporaryVariableCacheItem)} + if decorator: + item_name = decorator.factory_kwargs.get("item_name", ) -def cache_loop_domains(): - return [v.content for v in _cache.values() if isinstance(v, LoopDomainCacheItem)] + if item_name: + for itemtype in _cache: + if itemtype.__name__ == item_name: + for item in _cache[itemtype].values(): + yield item.content -def cache_includes(): - return [v.content for v in _cache.values() if isinstance(v, IncludeCacheItem)] + if generator_function: + for item in _cache[generator_function.itemtype].values(): + yield item.content -# Define some decorators that will be useful! -loop_domain = _dune_decorator_factory(cache_item_type=LoopDomainCacheItem) -dune_preamble = _dune_decorator_factory(cache_item_type=PreambleCacheItem) -loopy_iname = _dune_decorator_factory(cache_item_type=LoopyInameCacheItem) -dune_symbol = _dune_decorator_factory(cache_item_type=SymbolCacheItem) -temporary_variable = _dune_decorator_factory(cache_item_type=TemporaryVariableCacheItem) -loopy_c_instruction = _dune_decorator_factory(cache_item_type=CInstructionCacheItem) -loopy_expr_instruction = _dune_decorator_factory(cache_item_type=ExpressionInstructionCacheItem) -@_dune_decorator_factory(cache_item_type=IncludeCacheItem) -def dune_include(incfile): - return incfile \ No newline at end of file +def delete_cache(exclude=[]): + """ delete the cache - maybe apply some restrictions later """ + for k in _cache: + if k.__name__ not in exclude: + _cache[k] = {} diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index efb2cda7cf79af57eb3a82b4c3c7aef0466183de..956213b756c54c3c9458d93ad507f5c11bb1ba13 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -11,9 +11,9 @@ def get_form_compiler_arguments(): parser.add_argument("--driver-file", type=str, help="The filename for the generated driver header") parser.add_argument("--operator-file", type=str, help="The filename for the generated local operator header") parser.add_argument('--version', action='version', version='%(prog)s 0.1') + parser.add_argument("--numerical-jacobian", action="store_true", help="use numerical jacobians (only makes sense, if uflpdelab for some reason fails to generate analytic jacobians)") # These are the options that I deemed necessary in uflpdelab -# parser.add_argument("--numerical-jacobian", action="store_true", help="use numerical jacobians (only makes sense, if uflpdelab for some reason fails to generate analytic jacobians)") # parser.add_argument("--param-class-file", type=str, help="The filename for the generated parameter class header") # parser.add_argument("--export-trafo-graphs", action="store_true", help="export expression graphs after the application of each single transformation") # parser.add_argument("--parameter-tree", action="store_true", help="have the generated local operate take a Dune::ParameterTree as constructor argument") diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fcfea64b783e04420f817661e0cadd7204099efb --- /dev/null +++ b/python/dune/perftool/pdelab/__init__.py @@ -0,0 +1,19 @@ +""" The pdelab specific parts of the code generation process """ + +# Define the generators that are used throughout all pdelab specific code generations. +from dune.perftool.generation import generator_factory +dune_symbol = generator_factory(item_name="dune_symbol") +dune_preamble = generator_factory(item_name="dune_preamble", counted=True) +dune_include = generator_factory(on_store=lambda i: "#include<{}>".format(i), item_name="dune_include", no_deco=True) + +from dune.perftool.transformer import quadrature_iname +from loopy import CInstruction + +def quadrature_preamble(assignees=[]): + return generator_factory(counted=True, item_name="quadrature_preamble", on_store=lambda code: CInstruction(quadrature_iname(), code, assignees=assignees)) + + +# Now define some commonly used generators that do not fall into a specific category +@dune_symbol +def name_index(index): + return str(index._indices[0]) diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py new file mode 100644 index 0000000000000000000000000000000000000000..172f7681771d656f37ec86934c4fdf2482babf1d --- /dev/null +++ b/python/dune/perftool/pdelab/argument.py @@ -0,0 +1,28 @@ +""" Generator functions related to trial and test functions and the accumulation loop""" + +from dune.perftool.pdelab import dune_symbol, dune_preamble + + +@dune_symbol +def name_testfunction(arg, index, grad, restriction): + if len(arg.ufl_element().sub_elements()) > 0: + pass + return "{}a{}".format("grad_" if grad else "", arg.number()) + +@dune_symbol +def name_trialfunction(arg, index, grad, restriction): + return "{}c{}".format("grad_" if grad else "", arg.count()) + +@dune_symbol +def name_testfunctionspace(*a): + #TODO + return "lfsv" + +@dune_symbol +def name_trialfunctionspace(*a): + #TODO + return "lfsu" + +@dune_symbol +def name_residual(): + return "r" diff --git a/python/dune/perftool/driver.py b/python/dune/perftool/pdelab/driver.py similarity index 99% rename from python/dune/perftool/driver.py rename to python/dune/perftool/pdelab/driver.py index 539c4fe435533922cbbe217917255166c3ecb464..92f3549454f1dede277d7cc81cb28f954551df5b 100644 --- a/python/dune/perftool/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -7,7 +7,7 @@ to switch these to cgen expression. OTOH, there is not much to be gained there. """ -from dune.perftool.generation import dune_include, dune_preamble, dune_symbol +from dune.perftool.pdelab import dune_symbol, dune_preamble, dune_include # Have a global variable with the entire form data. This allows functions that depend # deterministically on the entire data set to directly access it instead of passing it diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..bc2bc74c37d6dec90897efba3d8e42e63ea93ce1 --- /dev/null +++ b/python/dune/perftool/pdelab/geometry.py @@ -0,0 +1,6 @@ +from dune.perftool.pdelab import dune_symbol + +@dune_symbol +def name_dimension(): + #TODO preamble define_dimension + return "dim" diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py new file mode 100644 index 0000000000000000000000000000000000000000..45d12cc60dcb27f688ea2e490bab42523d022300 --- /dev/null +++ b/python/dune/perftool/pdelab/localoperator.py @@ -0,0 +1,147 @@ +from pytools import memoize +from dune.perftool.options import get_option +from dune.perftool.generation import generator_factory + +# Define the generators used in-here +operator_include = generator_factory(item_name="operator_include", on_store=lambda i: "#include<{}>".format(i), no_deco=True) +base_class = generator_factory(item_name="operator_base_classes", counted=True, no_deco=True) +initializer_list = generator_factory(item_name="operator_initializerlist", counted=True) + +@memoize +def measure_specific_details(measure): + # The return dictionary that this memoized method will grant direct access to. + ret = {} + + if measure == "cell": + base_class('Dune::PDELab::FullVolumePattern') + if get_option("numerical_jacobian"): + base_class("Dune::PDELab::NumericalJacobianVolume<{}>".format()) + + ret["residual_signature"] = ['template<typename EG, typename LFSV0, typename X, typename LFSV1, typename R>', + 'void alpha_volume(const EG& eg, const LFSV0& lfsv0, const X& x, const LFSV1& lfsv1, R& r) const'] + ret["jacobian_signature"] = ['template<typename EG, typename LFSV0, typename X, typename LFSV1, typename J>', + 'void jacobian_volume(const EG& eg, const LFSV0& lfsv0, const X& x, const LFSV1& lfsv1, J& jac) const'] + + if measure == "exterior_facet": + base_class('Dune::PDELab::FullBoundaryPattern') + + ret["residual_signature"] = ['template<typename IG, typename LFSV0, typename X, typename LFSV1, typename R>', + 'void alpha_boundary(const IG& ig, const LFSV0& lfsv0, const X& x, const LFSV1& lfsv1, R& r) const'] + ret["jacobian_signature"] = ['template<typename IG, typename LFSV0, typename X, typename LFSV1, typename J>', + 'void jacobian_boundary(const IG& ig, const LFSV0& lfsv0, const X& x, const LFSV1& lfsv1, J& jac) const'] + + if measure == "interior_facet": + base_class('Dune::PDELab::FullSkeletonPattern') + + ret["residual_signature"] = ['template<typename IG, typename LFSV0_S, typename X, typename LFSV1_S, typename LFSV0_N, typename R, typename LFSV1_N>', + 'void alpha_skeleton(const IG& ig, const LFSV0_S& lfsv0_s, const X& x_s, const LFSV1_S& lfsv1_s, const LFSV0_N& lfsv0_n, const X& x_n, const LFSV1_N& lfsv1_n, R& r_s, R& r_n) const'] + ret["jacobian_signature"] = ['template<typename IG, typename LFSV0_S, typename X, typename LFSV1_S, typename LFSV0_N, typename LFSV1_N, typename Jac>', + 'void jacobian_skeleton(const IG& ig, const LFSV0_S& lfsv0_s, const X& x_s, const LFSV1_S& lfsv1_s, const LFSV0_N& lfsv0_n, const X& x_n, const LFSV1_N& lfsv1_n, Jac& jac_ss, Jac& jac_sn, Jac& jac_ns, Jac& jac_nn) const'] + + return ret + + +def generate_term(integrand=None, measure=None): + assert integrand and measure + + # Delete all non-include parts of the cache. + # TODO: add things such as base classes as cache items. + from dune.perftool.generation import delete_cache + delete_cache(exclude="dune_include") + + # Get the measure specifics + specifics = measure_specific_details(measure) + + # Transform the expression. All relevant results are then in the cache + from dune.perftool.transformer import transform_expression + transform_expression(integrand) + + # Extract the information, which is needed to create a loopy kernel. + # First extracting it, might be useful to alter it before kernel generation. + from dune.perftool.generation import retrieve_cache_items + from dune.perftool.target import DuneTarget + domains = [i for i in retrieve_cache_items(item_name="loopdomain")] + instructions = [i for i in retrieve_cache_items(item_name="c_instruction")] \ + + [i for i in retrieve_cache_items(item_name="expr_instruction")] \ + + [i[1] for i in retrieve_cache_items(item_name="quadrature_preamble")] + temporaries = {i.name:i for i in retrieve_cache_items(item_name="temporary")} + preambles = [i[1] for i in retrieve_cache_items(item_name="dune_preamble")] + + print "Printing the information that we found:\n\nDomains:" + for d in domains: + print d + print "\nInstructions:" + for i in instructions: + print i + print "\nTemporaries:" + for t, v in temporaries.items(): + print "{}: {}".format(t, v) + print "\nPreambles:" + for p in preambles: + print p + +# import loopy +# import numpy +# from loopy import make_kernel, preprocess_kernel +# from dune.perftool.target import DuneTarget +# k = make_kernel(domains, instructions, +# [loopy.GlobalArg("grad_a0", numpy.float64, shape=("argi_n", "dim")), +# loopy.GlobalArg("grad_c0", numpy.float64, shape=("dim",)), +# loopy.ValueArg("dim", numpy.int32), +# loopy.ValueArg("q_n", numpy.int32), +# loopy.ValueArg("argi_n", numpy.int32)], +# temporary_variables=temporaries, target=DuneTarget()) +# k = preprocess_kernel(k) + # Create the kernel + from loopy import make_kernel, preprocess_kernel, add_dtypes + kernel = make_kernel(domains, instructions, temporary_variables=temporaries, preambles=preambles, target=DuneTarget()) + import numpy + kernel = add_dtypes(kernel, dict(grad_a0=numpy.float64, grad_c0=numpy.float64)) + kernel = preprocess_kernel(kernel) + + # Return the actual code (might instead return kernels...) + from loopy import generate_code + return str(generate_code(kernel)[0]) + + +def generate_localoperator(ufldata, operatorfile): + form = ufldata.preprocessed_form + + operator_methods = [] + + # For the moment, I do assume that there is but one integral of each type. This might differ + # if you use different quadrature orders for different terms. + assert len(form.integrals()) == len(set(i.integral_type() for i in form.integrals())) + + # Reset the generation cache + from dune.perftool.generation import delete_cache + delete_cache() + + # Generate the necessary residual methods + for integral in form.integrals(): + body = generate_term(integrand=integral.integrand(), measure=integral.integral_type()) + signature = measure_specific_details(integral.integral_type())["residual_signature"] + operator_methods.append((signature, body)) + + # Generate the necessary jacobian methods + from dune.perftool.options import get_option + if get_option("numerical_jacobian"): + operator_include("dune/pdelab/localoperator/defaultimp.hh") + else: + pass + + # TODO: JacobianApply for matrix-free computations. + + # Manage includes and base classes that we always need + operator_include('dune/pdelab/gridfunctionspace/gridfunctionspaceutilities.hh') + operator_include('dune/pdelab/localoperator/idefault.hh') + operator_include('dune/pdelab/localoperator/flags.hh') + operator_include('dune/pdelab/localoperator/pattern.hh') + operator_include('dune/common/parametertree.hh') + operator_include('dune/geometry/quadraturerules.hh') + + base_class('Dune::PDELab::LocalOperatorDefaultFlags') + + from IPython import embed; embed() + + print operator_methods[0] diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py new file mode 100644 index 0000000000000000000000000000000000000000..06b167541b98b9e9baa089c2fc133febcbed0fae --- /dev/null +++ b/python/dune/perftool/pdelab/quadrature.py @@ -0,0 +1,18 @@ +from dune.perftool.generation import generator_factory +from dune.perftool.transformer import quadrature_iname, loopy_temporary_variable +from dune.perftool.pdelab import dune_symbol, quadrature_preamble, dune_preamble + +@dune_symbol +def quadrature_rule(): + return "rule" + +@quadrature_preamble(assignees="fac") +def define_quadrature_factor(fac): + rule = quadrature_rule() + return "auto {} = {}->weight();".format(fac, rule) + +@dune_symbol +def name_factor(): + loopy_temporary_variable("fac") + define_quadrature_factor("fac") + return "fac" diff --git a/python/dune/perftool/pdelab_names.py b/python/dune/perftool/pdelab_names.py deleted file mode 100644 index ec0c7aea3f5d4cc56fa03bfa18115374b9d19b3c..0000000000000000000000000000000000000000 --- a/python/dune/perftool/pdelab_names.py +++ /dev/null @@ -1,4 +0,0 @@ -_namedict = {"quadrature_factor": "fac"} - -def name(key): - return _namedict[key] \ No newline at end of file diff --git a/python/dune/perftool/pdelab_preambles.py b/python/dune/perftool/pdelab_preambles.py deleted file mode 100644 index c3e3c7252095a9a3d7e1441ca1be58b770b6e8a3..0000000000000000000000000000000000000000 --- a/python/dune/perftool/pdelab_preambles.py +++ /dev/null @@ -1,71 +0,0 @@ -from dune.perftool.generation import dune_preamble, dune_symbol, loop_domain, loopy_iname, temporary_variable, loopy_c_instruction, loopy_expr_instruction -from dune.perftool.pdelab_names import name - -@dune_symbol -def test_function_name(arg, grad): - return "{}a{}".format("grad_" if grad else "", arg.number()) - -@dune_symbol -def trial_function_name(arg, grad): - return "{}c{}".format("grad_" if grad else "", arg.count()) - -@dune_symbol -def index_name(index): - return str(index._indices[0]) - -@loop_domain -def argument_loop_domain(name): - return name - -@loopy_iname -def argument_iname(arg): - name = "arg{}".format(chr(ord("i") + arg.number())) - argument_loop_domain(name) - return name - -@dune_symbol -def dimension(): - return "dim" - -@loop_domain -def quadrature_loop_domain(name): - return name - -@loopy_iname -def quadrature_iname(): - quadrature_loop_domain("q") - return "q" - -from dune.perftool.generation import _dune_decorator_factory, CInstructionCacheItem -quadrature_preamble = _dune_decorator_factory(cache_item_type=CInstructionCacheItem, inames=quadrature_iname()) - -@dune_symbol -def quadrature_rule(): - return "rule" - -@quadrature_preamble(assignees=name("quadrature_factor")) -def define_quadrature_factor(): - rule = quadrature_rule() - return "auto {} = {}->weight();".format(name("quadrature_factor"), rule) - -@temporary_variable -def quadrature_factor(): - define_quadrature_factor() - return name("quadrature_factor") - -@loop_domain(upperbound=dimension()) -def dimension_loop_domain(name): - return name - -@loopy_iname -def dimension_iname(index): - dimension_loop_domain(index_name(index)) - return index_name(index) - -@dune_symbol -def residual_name(): - return "r" - -@loopy_expr_instruction -def accumulation_instruction(expr): - return expr \ No newline at end of file diff --git a/python/dune/perftool/transformer.py b/python/dune/perftool/transformer.py index ec393f18d628d07ed82e5b637fe877c7e3600864..63db1e350bec6cf3656785b87587022ddca40cbd 100644 --- a/python/dune/perftool/transformer.py +++ b/python/dune/perftool/transformer.py @@ -1,17 +1,49 @@ # Generate a loop kernel from that cell integral. This will map the UFL IR # to the loopy IR. from ufl.algorithms import MultiFunction +# Spread the pymbolic import statements to where they are used. from pymbolic.primitives import Variable, Subscript, Sum, Product -from loopy.kernel.data import ExpressionInstruction, CInstruction import loopy import numpy +import ufl -# For now, import all implemented preambles, restrict later -from dune.perftool.pdelab_preambles import * from dune.perftool.restriction import Restriction +# Define the generators that are used here +from dune.perftool.generation import generator_factory +loopy_iname = generator_factory(item_name="inames") +loopy_expr_instruction = generator_factory(item_name="expr_instruction", no_deco=True) +loopy_temporary_variable = generator_factory(item_name="temporary", on_store=lambda n: loopy.TemporaryVariable(n, dtype=numpy.float64), no_deco=True) +loopy_c_instruction = generator_factory(item_name="c_instruction", no_deco=True) + +@generator_factory(item_name="loopdomain") +def loopy_domain(iname, shape): + return "{{ [{0}] : 0<={0}<{1} }}".format(iname, shape) + +@loopy_iname +def dimension_iname(index): + from dune.perftool.pdelab import name_index + from dune.perftool.pdelab.geometry import name_dimension + iname = name_index(index) + dimname = name_dimension() + loopy_domain(iname, dimname) + return iname + +@loopy_iname +def argument_iname(arg): + # TODO extract the {iname}_n thing by a preamble + iname = "arg{}".format(chr(ord("i") + arg.number())) + loopy_domain(iname, iname + "_n") + return iname + +@loopy_iname +def quadrature_iname(): + loopy_domain("q", "q_n") + return "q" + + class UFLVisitor(MultiFunction): - def __call__(self, o): + def __call__(self, o, init_inames): # Make this multifunction stateful: Store information similar to uflacs' modified terminal self.grad = False self.reference_grad = False @@ -21,22 +53,44 @@ class UFLVisitor(MultiFunction): # Have a short cut for the bases class call operator self.call = lambda *a : MultiFunction.__call__(self, *a) - # Have a list of argument indices that this term depends on. - self.argument_indices = [] + # Have a list of arguments that this term depends on. + self.arguments = [] + self.inames = init_inames + # Visit the expression. The return value will be a pymbolic expression. loopyexpr = self.call(o) # TODO no jacobian support yet! - assert len(self.argument_indices) == 1 + assert len(self.arguments) == 1 + + # The expression is completely processes, an accumulation instruction can be issued! + # Have some data structures for the analysis of the given arguments. + accumargs = [] + + # visit the given arguments and extract necessary information + for arg, index, grad, restriction in self.arguments: + from dune.perftool.pdelab.argument import name_testfunctionspace + accumargs.append(name_testfunctionspace(arg, index, restriction)) + accumargs.append(argument_iname(arg)) + + # Explicitly state the dependency on the quadrature iname + self.inames.append(quadrature_iname()) + + # Evaluate the update factor + from dune.perftool.pdelab.quadrature import name_factor + loopyexpr = Product((loopyexpr, Variable(name_factor()))) + # TODO: Some unique name mangling on this temporary variable is necessary! + expr_tv = loopy_temporary_variable("xyz") + loopy_expr_instruction(loopy.ExpressionInstruction(assignee=Variable("xyz"), expression=loopyexpr)) - assignee = Subscript(Variable("r"), Variable(self.argument_indices[0])) - loopyexpr = Sum((assignee, Product((Variable(quadrature_factor()), loopyexpr)))) + from dune.perftool.pdelab.argument import name_residual + residual = name_residual() + + loopy_c_instruction(loopy.CInstruction(self.inames, "{}.accumulate({}, {})".format(residual, ", ".join(accumargs), Variable("xyz")))) - instruction = ExpressionInstruction(assignee=assignee, expression=loopyexpr) - accumulation_instruction(instruction) def grad(self, o): - assert(len(o.operands()) == 1) + assert len(o.operands()) == 1 self.grad = True ret = self.call(o.operands()[0]) @@ -44,26 +98,31 @@ class UFLVisitor(MultiFunction): return ret def argument(self, o): - assert(len(o.operands()) == 0) + assert len(o.operands()) == 0 - # We do need an argument loop domain for this argument - argindex = argument_iname(o) + # Construct a tuple that represents this argument and all its modifications + self.arguments.append((o, self.index, self.grad, self.restriction)) # Generate the variable name for the test function - name = test_function_name(o, self.grad) - index = index_name(self.index) - - # Register that index as argument loop index for later - self.argument_indices.append(argindex) + from dune.perftool.pdelab.argument import name_testfunction + name = name_testfunction(o, self.index, self.grad, self.restriction) + index = argument_iname(o) + self.inames.append(index) - return Subscript(Variable(name), Variable(index)) + return Subscript(Variable(name), (Variable(index),)) +# return Subscript(Variable(name), Variable(index)) def coefficient(self, o): - assert(len(o.operands()) == 0) + assert len(o.operands()) == 0 + # TODO: Implement coefficient functions + assert o.count() == 0 + + # TODO implement non-trialfunction coefficients! + from dune.perftool.pdelab.argument import name_trialfunction + from dune.perftool.pdelab import name_index + name = name_trialfunction(o, self.index, self.grad, self.restriction) - name = trial_function_name(o, self.grad) - index = index_name(self.index) - return Subscript(Variable(name), Variable(index)) + return Variable(name) def product(self, o): return Product(tuple(self.call(op) for op in o.operands())) @@ -73,11 +132,30 @@ class UFLVisitor(MultiFunction): raise ValueError def indexed(self, o): - # TODO in the long run, this is a stack of indices. - self.index = o.operands()[1] - ret = self.call(o.operands()[0]) - self.index = None - return ret + # TODO currently, we assume single indices + assert len(o.operands()[1]) == 1 + + # We have two types of indices in our expressions: + # * those that should result in variable subscripts + # * those that should result in function space child selection + # I think it is correct to assume that Indexed expressions + # are of the latter type if and only if their first operand is + # an argument or coefficient. + if isinstance(o.operands()[0], (ufl.Argument, ufl.Coefficient)): + assert not self.index + self.index = o.operands()[1] + ret = self.call(o.operands()[0]) + self.index = None + return ret + else: + from dune.perftool.pdelab import name_index +# return Subscript(self.call(o.operands()[0]), Variable(name_index(o.operands()[1]))) + ret = self.call(o.operands()[0]) + index = Variable(name_index(o.operands()[1])) + if isinstance(ret, Subscript): + return Subscript(ret.aggregate, ret.index + (index,)) + else: + return Subscript(ret, (index,)) class TopSumSeparation(MultiFunction): @@ -85,18 +163,19 @@ class TopSumSeparation(MultiFunction): def __init__(self): MultiFunction.__init__(self) self.visitor = UFLVisitor() + self.inames = [] def expr(self, o): - print "Call TopSumSeparation.expr with {}".format(o) - self.visitor(o) + self.visitor(o, self.inames) def sum(self, o): for op in o.operands(): self(op) def index_sum(self, o): - print "Call TopSumSeparation.index_sum with {}".format(o) - dimension_iname(o.operands()[1]) + # Generate an iname: We do this here and restrict ourselves to shape dim. + # TODO revisit when implementing general index sums + self.inames.append(dimension_iname(o.operands()[1])) self(o.operands()[0])