Skip to content
Snippets Groups Projects
Commit 2b951aa8 authored by Marcel Koch's avatar Marcel Koch
Browse files

adjust permute to general vector size

parent e84baaf2
No related branches found
No related tags found
No related merge requests found
......@@ -33,6 +33,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
vng = knl.get_var_name_generator()
idg = knl.get_instruction_id_generator()
new_vec_temporaries = dict()
vcl_size = get_vcl_type_size(np.float64)
for insn in knl.instructions:
# somehow CInstructions are not hashable....
if isinstance(insn, lp.MultiAssignmentBase) and insn in accum_insns:
......@@ -61,11 +62,11 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
identifier_b = vng('b')
new_vec_temporaries[identifier_a] = DuneTemporaryVariable(identifier_a, dtype=np.float64,
shape=(2,)*(world_dimension()-1)
+(get_vcl_type_size(np.float64),),
+(vcl_size,),
managed=True, scope=lp.temp_var_scope.PRIVATE,
dim_tags=('f',)*(world_dimension()-1)+('vec',))
new_vec_temporaries[identifier_b] = DuneTemporaryVariable(identifier_b, dtype=np.float64,
shape=(get_vcl_type_size(np.float64),), managed=True,
shape=(vcl_size,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
var_a = prim.Subscript(prim.Variable(identifier_a),
......@@ -108,7 +109,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
# r+=a[iy]
id_accum = idg('insn_mod_accum')
expr_accum = prim.Sum((var_a,
prim.Call(prim.Variable('permute4d<-1,{}>'.format(','.join(map(str,range(get_vcl_type_size(np.float64)-1))))),
prim.Call(prim.Variable('permute{}d<-1,{}>'.format(vcl_size,','.join(map(str,range(vcl_size-1))))),
(var_b,)),
substitute(insn.assignee, {iname_ix:0})))
new_insns.append(lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}),
......@@ -121,7 +122,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
)
# a[iy] = permute
id_permute = idg('insn_permute')
expr_permute = prim.Call(prim.Variable('permute4d<3,{}>'.format(','.join(['-1']*(get_vcl_type_size(np.float64)-1)))),
expr_permute = prim.Call(prim.Variable('permute{}d<3,{}>'.format(vcl_size,','.join(['-1']*(vcl_size-1)))),
(var_b,))
new_insns.append(lp.Assignment(assignee=var_a,
expression=expr_permute,
......@@ -134,7 +135,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
# tail handling, uses tail alias
id_accum_tail = idg('insn_accum_tail')
subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")//get_vcl_type_size(np.float64),
subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")//vcl_size,
iname_ix: 0, insn.assignee_name: prim.Variable(insn.assignee_name+'_tail'),
**replace_tail_inames}
assignee_tail = substitute(insn.assignee, subst_map)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment