diff --git a/python/dune/perftool/loopy/__init__.py b/python/dune/perftool/loopy/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2ef32a24fa3721737845e5ce54d42de834effb5b 100644 --- a/python/dune/perftool/loopy/__init__.py +++ b/python/dune/perftool/loopy/__init__.py @@ -0,0 +1,4 @@ +""" Export the interface interesting to the rest of the project """ + +from dune.perftool.loopy.transformations.collect_precompute import collect_vector_data_precompute +from dune.perftool.loopy.transformations.duplicate import heuristic_duplication diff --git a/python/dune/perftool/loopy/collectvector.py b/python/dune/perftool/loopy/transformations/collect_precompute.py similarity index 89% rename from python/dune/perftool/loopy/collectvector.py rename to python/dune/perftool/loopy/transformations/collect_precompute.py index 3d69d623f28851466fc44690ef876366f4f7aeb3..2324defcff748241ea7dbc1e6ab2f4a786565b34 100644 --- a/python/dune/perftool/loopy/collectvector.py +++ b/python/dune/perftool/loopy/transformations/collect_precompute.py @@ -1,9 +1,10 @@ -""" A kernel transformation that collects data until the vector size is reached """ +""" A kernel transformation that precomputes data and then splits computation +in chunks of vector size independent of divisibility of the loop bounds. """ from dune.perftool.loopy.vcl import get_vcl_type_size -from dune.perftool.loopy.vectorview import (add_vector_view, - get_vector_view_name, - ) +from dune.perftool.loopy.transformations.vectorview import (add_vector_view, + get_vector_view_name, + ) from dune.perftool.tools import get_pymbolic_basename from loopy.kernel.creation import parse_domains @@ -17,7 +18,7 @@ import loopy as lp import numpy as np -def collect_vector_data(knl, insns, inames): +def collect_vector_data_precompute(knl, insns, inames): # # Process/Assert/Standardize the input # @@ -55,12 +56,9 @@ def collect_vector_data(knl, insns, inames): # Do precomputation of the quantity prec_quantity = "{}_precomputed".format(quantity) - compute_id = "{}_compute_id".format(quantity) - compute_ids.append(compute_id) - knl = lp.precompute(knl, subst_name, inames, temporary_name=prec_quantity, - compute_insn_id=compute_id) + ) # Introduce a vector view of the precomputation result knl = add_vector_view(knl, prec_quantity) diff --git a/python/dune/perftool/loopy/duplicate.py b/python/dune/perftool/loopy/transformations/duplicate.py similarity index 100% rename from python/dune/perftool/loopy/duplicate.py rename to python/dune/perftool/loopy/transformations/duplicate.py diff --git a/python/dune/perftool/loopy/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py similarity index 100% rename from python/dune/perftool/loopy/vectorview.py rename to python/dune/perftool/loopy/transformations/vectorview.py diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 8c48334121000df5d0707a9594438787e25f58f1..73a7336782bce45263c0ee5aac5fa1a0a7d59690 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -501,13 +501,13 @@ def generate_kernel(integrals): from loopy import make_reduction_inames_unique kernel = make_reduction_inames_unique(kernel) + kernel = preprocess_kernel(kernel) + # Apply the transformations that were gathered during tree traversals for trafo in transformations: kernel = trafo[0](kernel, *trafo[1]) - kernel = preprocess_kernel(kernel) - - from dune.perftool.loopy.duplicate import heuristic_duplication + from dune.perftool.loopy import heuristic_duplication kernel = heuristic_duplication(kernel) # Now add the preambles to the kernel