Skip to content
Snippets Groups Projects
Commit 9e9392e1 authored by Marcel Koch's avatar Marcel Koch
Browse files

find suitable vcl size

parent 406d2502
No related branches found
No related tags found
No related merge requests found
...@@ -483,7 +483,11 @@ def vectorize_micro_elements(knl): ...@@ -483,7 +483,11 @@ def vectorize_micro_elements(knl):
outer_iname = vec_iname + '_outer' outer_iname = vec_iname + '_outer'
tail_size = iname_bound % vcl_size tail_size = iname_bound % vcl_size
vectorize_tail = ((vcl_size // 2) > 1) and (tail_size >= (vcl_size // 2)) tail_vcl_size = vcl_size
while tail_vcl_size > tail_size:
tail_vcl_size = tail_vcl_size // 2
assert(tail_vcl_size > 0)
vectorize_tail = tail_vcl_size > 1
# manually add tail, since split_iname with slabs tries to vectorize the tail # manually add tail, since split_iname with slabs tries to vectorize the tail
if tail_size > 0: if tail_size > 0:
...@@ -512,7 +516,7 @@ def vectorize_micro_elements(knl): ...@@ -512,7 +516,7 @@ def vectorize_micro_elements(knl):
knl = add_vcl_access(knl, inner_iname, vcl_size, level) knl = add_vcl_access(knl, inner_iname, vcl_size, level)
if tail_size > 0 and vectorize_tail: if tail_size > 0 and vectorize_tail:
knl = _do_vectorization(knl, tail_iname, tail_size, vcl_size // 2, level + 1) knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1)
return knl return knl
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment