From cdd98d129413c807255f00ab566345c973dc802d Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Fri, 15 Feb 2019 16:03:51 +0100
Subject: [PATCH] refactoring

---
 .../codegen/blockstructured/vectorization.py  | 97 ++++++++++---------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index cfdfc794..d5224e6f 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -488,6 +488,54 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
     return lp.make_reduction_inames_unique(knl)
 
 
+def do_vectorization(knl, orig_iname, vec_iname, iname_bound, vcl_size, level=0):
+    inner_iname = vec_iname + '_inner'
+    outer_iname = vec_iname + '_outer'
+
+    tail_size = iname_bound % vcl_size
+    if get_form_option('vectorization_blockstructured_tail'):
+        tail_vcl_size = vcl_size
+        while tail_vcl_size > tail_size:
+            tail_vcl_size = tail_vcl_size // 2
+        vectorize_tail = tail_vcl_size > 1
+    else:
+        vectorize_tail = False
+
+    # manually add tail, since split_iname with slabs tries to vectorize the tail
+    if tail_size > 0:
+        # fake suitable loop bound
+        vectorizable_bound = (iname_bound // vcl_size) * vcl_size
+        from loopy.kernel.tools import DomainChanger
+        domch = DomainChanger(knl, (vec_iname,))
+        knl = knl.copy(domains=domch.get_domains_with(
+            BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
+
+        knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
+
+        tail_iname = vec_iname + '_inner' + '_tail'
+        knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level)
+    else:
+        knl = lp.split_iname(knl, vec_iname, vcl_size)
+
+    knl = lp.tag_inames(knl, [(inner_iname, 'vec')])
+
+    array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
+    knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
+
+    knl = add_vcl_temporaries(knl, vcl_size)
+    knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level)
+    knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level)
+    knl = add_vcl_access(knl, inner_iname, vcl_size, level)
+
+    if tail_size > 0:
+        knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))),
+                                Tagged('vectorized_{}'.format(level)))
+        if vectorize_tail:
+            knl = do_vectorization(knl, orig_iname, tail_iname, tail_size, tail_vcl_size, level + 1)
+
+    return knl
+
+
 def vectorize_micro_elements(knl):
     vec_iname = "subel_x"
     orig_iname = vec_iname
@@ -495,52 +543,5 @@ def vectorize_micro_elements(knl):
         vcl_size = get_vcl_type_size(np.float64)
         knl = add_iname_array(knl, vec_iname)
 
-        def _do_vectorization(knl, vec_iname, iname_bound, vcl_size, level=0):
-            inner_iname = vec_iname + '_inner'
-            outer_iname = vec_iname + '_outer'
-
-            tail_size = iname_bound % vcl_size
-            if get_form_option('vectorization_blockstructured_tail'):
-                tail_vcl_size = vcl_size
-                while tail_vcl_size > tail_size:
-                    tail_vcl_size = tail_vcl_size // 2
-                vectorize_tail = tail_vcl_size > 1
-            else:
-                vectorize_tail = False
-
-            # manually add tail, since split_iname with slabs tries to vectorize the tail
-            if tail_size > 0:
-                # fake suitable loop bound
-                vectorizable_bound = (iname_bound // vcl_size) * vcl_size
-                from loopy.kernel.tools import DomainChanger
-                domch = DomainChanger(knl, (vec_iname,))
-                knl = knl.copy(domains=domch.get_domains_with(
-                    BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
-
-                knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
-
-                tail_iname = vec_iname + '_inner' + '_tail'
-                knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level)
-            else:
-                knl = lp.split_iname(knl, vec_iname, vcl_size)
-
-            knl = lp.tag_inames(knl, [(inner_iname, 'vec')])
-
-            array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
-            knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
-
-            knl = add_vcl_temporaries(knl, vcl_size)
-            knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level)
-            knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level)
-            knl = add_vcl_access(knl, inner_iname, vcl_size, level)
-
-            if tail_size > 0:
-                knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))),
-                                        Tagged('vectorized_{}'.format(level)))
-                if vectorize_tail:
-                    knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1)
-
-            return knl
-
-        knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size)
+        knl = do_vectorization(knl, orig_iname, orig_iname, get_form_option('number_of_blocks'), vcl_size)
     return knl
-- 
GitLab