Skip to content
Snippets Groups Projects
Commit cdd98d12 authored by Marcel Koch's avatar Marcel Koch
Browse files

refactoring

parent 57ad4513
No related branches found
No related tags found
No related merge requests found
...@@ -488,6 +488,54 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz ...@@ -488,6 +488,54 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
return lp.make_reduction_inames_unique(knl) return lp.make_reduction_inames_unique(knl)
def do_vectorization(knl, orig_iname, vec_iname, iname_bound, vcl_size, level=0):
inner_iname = vec_iname + '_inner'
outer_iname = vec_iname + '_outer'
tail_size = iname_bound % vcl_size
if get_form_option('vectorization_blockstructured_tail'):
tail_vcl_size = vcl_size
while tail_vcl_size > tail_size:
tail_vcl_size = tail_vcl_size // 2
vectorize_tail = tail_vcl_size > 1
else:
vectorize_tail = False
# manually add tail, since split_iname with slabs tries to vectorize the tail
if tail_size > 0:
# fake suitable loop bound
vectorizable_bound = (iname_bound // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
knl = knl.copy(domains=domch.get_domains_with(
BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
tail_iname = vec_iname + '_inner' + '_tail'
knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level)
else:
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(inner_iname, 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level)
knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level)
knl = add_vcl_access(knl, inner_iname, vcl_size, level)
if tail_size > 0:
knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))),
Tagged('vectorized_{}'.format(level)))
if vectorize_tail:
knl = do_vectorization(knl, orig_iname, tail_iname, tail_size, tail_vcl_size, level + 1)
return knl
def vectorize_micro_elements(knl): def vectorize_micro_elements(knl):
vec_iname = "subel_x" vec_iname = "subel_x"
orig_iname = vec_iname orig_iname = vec_iname
...@@ -495,52 +543,5 @@ def vectorize_micro_elements(knl): ...@@ -495,52 +543,5 @@ def vectorize_micro_elements(knl):
vcl_size = get_vcl_type_size(np.float64) vcl_size = get_vcl_type_size(np.float64)
knl = add_iname_array(knl, vec_iname) knl = add_iname_array(knl, vec_iname)
def _do_vectorization(knl, vec_iname, iname_bound, vcl_size, level=0): knl = do_vectorization(knl, orig_iname, orig_iname, get_form_option('number_of_blocks'), vcl_size)
inner_iname = vec_iname + '_inner'
outer_iname = vec_iname + '_outer'
tail_size = iname_bound % vcl_size
if get_form_option('vectorization_blockstructured_tail'):
tail_vcl_size = vcl_size
while tail_vcl_size > tail_size:
tail_vcl_size = tail_vcl_size // 2
vectorize_tail = tail_vcl_size > 1
else:
vectorize_tail = False
# manually add tail, since split_iname with slabs tries to vectorize the tail
if tail_size > 0:
# fake suitable loop bound
vectorizable_bound = (iname_bound // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
knl = knl.copy(domains=domch.get_domains_with(
BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
tail_iname = vec_iname + '_inner' + '_tail'
knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level)
else:
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(inner_iname, 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level)
knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level)
knl = add_vcl_access(knl, inner_iname, vcl_size, level)
if tail_size > 0:
knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))),
Tagged('vectorized_{}'.format(level)))
if vectorize_tail:
knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1)
return knl
knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size)
return knl return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment