diff --git a/python/dune/perftool/loopy/duplicate.py b/python/dune/perftool/loopy/duplicate.py new file mode 100644 index 0000000000000000000000000000000000000000..7f88e2ff0b1f9f983d49b146bd02b1a274859c03 --- /dev/null +++ b/python/dune/perftool/loopy/duplicate.py @@ -0,0 +1,21 @@ +""" Iname duplication strategies to make kernels schedulable """ + +from loopy import (has_schedulable_iname_nesting, + get_iname_duplication_options, + duplicate_inames, + ) + + +def heuristic_duplication(kernel): + # If the given kernel is schedulable, nothing needs to be done. + if has_schedulable_iname_nesting(kernel): + return kernel + + # List all duplication options and return the transformed + # kernel if one such duplication transformation was enough to solve the problem. + for iname, within in get_iname_duplication_options(kernel): + dup_kernel = duplicate_inames(kernel, iname, within) + if has_schedulable_iname_nesting(dup_kernel): + return dup_kernel + + raise NotImplementedError("Your kernel needs multiple iname duplications! No generic algorithm implemented for that yet! (#39)") \ No newline at end of file diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 612eb77b1e4e4b23469ae3be02298ec5ed07c915..01de0906aa7c5bdc3e47daa7c7d86ab7c75a7ebb 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -244,6 +244,11 @@ def generate_kernel(integrals): subdomain_id = integral.subdomain_id() subdomain_data = integral.subdomain_data() + # Maybe make the jacobian inverse diagonal! + if get_option('diagonal_transformation_matrix'): + from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian + integrand = diagonal_jacobian(integrand) + from dune.perftool.ufl.dimensionindex import dimension_index_mapping dimension_indices = dimension_index_mapping(integrand) @@ -285,23 +290,14 @@ def generate_kernel(integrals): kernel = preprocess_kernel(kernel) - # see whether we need iname duplication to make this thing schedulable! - # TODO: Use a clever strategy here, instead of random transformation until the problem is resolved - from loopy import has_schedulable_iname_nesting, get_iname_duplication_options, duplicate_inames - while not has_schedulable_iname_nesting(kernel): - # If there is a duplication that solves the problem with just one duplication, we pick that one - iname, within = (None, None) - for i, w in get_iname_duplication_options(kernel): - if has_schedulable_iname_nesting(duplicate_inames(kernel, i, w)): - iname, within = (i, w) - - # Otherwise pick a random one. - if iname is None: - iname, within = next(get_iname_duplication_options(kernel)) - - # Do the transformation - print("Applying iname duplication to measure {}: iname {}; within {}".format(measure, iname, within)) - kernel = duplicate_inames(kernel, iname, within) + from dune.perftool.loopy.duplicate import heuristic_duplication + kernel = heuristic_duplication(kernel) + + # This is also silly, but 1D DG Schemes never need the geometry, so the quadrature + # statement actually introduces a preamble at a stage where preambles cannot be generated + # anymore. TODO think about how to avoid this!!! + from dune.perftool.pdelab.quadrature import quadrature_loop_statement + quadrature_loop_statement() # Loopy might have introduced some temporary variables during preprocessing. As I want to have my own # temporary declaration code right now, I call the declaration preamble manually. diff --git a/python/dune/perftool/ufl/transformations/axiparallel.py b/python/dune/perftool/ufl/transformations/axiparallel.py new file mode 100644 index 0000000000000000000000000000000000000000..6cfbf2fe00be890d9342949a863aba3284d044f0 --- /dev/null +++ b/python/dune/perftool/ufl/transformations/axiparallel.py @@ -0,0 +1,52 @@ +""" +The Jacobian Inverse Transposed for axiparallel cube grids is diagonal. +We enforce this through a post-processing transformation (instead of hacking it into UFL) +""" + +from dune.perftool.ufl.transformations import ufl_transformation +from ufl.algorithms import MultiFunction +from ufl.classes import (Indexed, + JacobianInverse, + MultiIndex, + Product, + Restricted, + ) + + +class DiagonalJIT(MultiFunction): + def __init__(self): + self.replace = {} + MultiFunction.__init__(self) + + def expr(self, o): + return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands)) + + def multi_index(self, o): +# from pudb import set_trace; set_trace() + return MultiIndex(tuple(self.replace.get(i, i) for i in o)) + + def index_sum(self, o): + e, i = o.ufl_operands + if isinstance(e, Product): + p1, p2 = e.ufl_operands + if isinstance(p1, Indexed): + p, ii = p1.ufl_operands + if isinstance(p, Restricted): + p = p.ufl_operands[0] + if isinstance(p, JacobianInverse): + assert(i[0] == ii[0]) + self.replace[ii[0]] = ii[1] + ret = self(e) + self.replace = {} + return ret + + return self.reuse_if_untouched(o, self(e), i) + + +@ufl_transformation(name='axiparallel') +def diagonal_jacobian(e): + """ + This transformations can generically be described as: + \sum_k J^{(+/-)}_{k,i}x_k ==> J^{(+/-)}_{i,i}x_i + """ + return DiagonalJIT()(e) diff --git a/test/stokes/stokes_stress.mini b/test/stokes/stokes_stress.mini index 4481e91d21e683dc8452c84c35ca03eca7a2182c..09e874f12b1c06c46846eeb73bb57ca727319ac8 100644 --- a/test/stokes/stokes_stress.mini +++ b/test/stokes/stokes_stress.mini @@ -1,5 +1,6 @@ __name = stokes_stress_{__exec_suffix} -__exec_suffix = symdiff, numdiff | expand num +#__exec_suffix = symdiff, numdiff | expand num +__exec_suffix = numdiff lowerleft = 0.0 0.0 upperright = 1.0 1.0 @@ -13,6 +14,7 @@ reference = hagenpoiseuille_ref extension = vtu [formcompiler] -numerical_jacobian = 0, 1 | expand num +#numerical_jacobian = 0, 1 | expand num +numerical_jacobian = 1 exact_solution_expression = g compare_l2errorsquared = 1e-6