Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dune-codegen
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Christian Heinigk
dune-codegen
Commits
fed5721e
Commit
fed5721e
authored
6 years ago
by
Marcel Koch
Browse files
Options
Downloads
Patches
Plain Diff
ensure ordering: vectorized code before tail
parent
70b9b0bb
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
python/dune/codegen/blockstructured/vectorization.py
+60
-33
60 additions, 33 deletions
python/dune/codegen/blockstructured/vectorization.py
with
60 additions
and
33 deletions
python/dune/codegen/blockstructured/vectorization.py
+
60
−
33
View file @
fed5721e
...
@@ -41,7 +41,7 @@ def add_vcl_temporaries(knl, vcl_size):
...
@@ -41,7 +41,7 @@ def add_vcl_temporaries(knl, vcl_size):
iname_to_tag
=
dict
(
**
knl
.
iname_to_tag
,
**
{
init_iname
:
VectorizeTag
()}))
iname_to_tag
=
dict
(
**
knl
.
iname_to_tag
,
**
{
init_iname
:
VectorizeTag
()}))
def
add_vcl_accum_insns
(
knl
,
inner_iname
,
outer_iname
,
vcl_size
):
def
add_vcl_accum_insns
(
knl
,
inner_iname
,
outer_iname
,
vcl_size
,
level
):
nptype
=
dtype_floatingpoint
()
nptype
=
dtype_floatingpoint
()
accum_insns
=
lp
.
find_instructions
(
knl
,
And
((
Tagged
(
'
accum
'
),
Iname
(
inner_iname
))))
accum_insns
=
lp
.
find_instructions
(
knl
,
And
((
Tagged
(
'
accum
'
),
Iname
(
inner_iname
))))
...
@@ -93,13 +93,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
...
@@ -93,13 +93,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
var_right
=
prim
.
Subscript
(
prim
.
Variable
(
identifier_right
),
(
prim
.
Variable
(
inner_iname
),))
var_right
=
prim
.
Subscript
(
prim
.
Variable
(
identifier_right
),
(
prim
.
Variable
(
inner_iname
),))
# init a
# init a
id_init_a
=
idg
(
'
{}
_init_
'
+
identifier_left
)
id_init_a
=
idg
(
'
insn
_init_
'
+
identifier_left
)
new_insns
.
append
(
lp
.
Assignment
(
assignee
=
substitute
(
var_left
,
replace_head_inames
),
new_insns
.
append
(
lp
.
Assignment
(
assignee
=
substitute
(
var_left
,
replace_head_inames
),
expression
=
0
,
expression
=
0
,
id
=
id_init_a
,
id
=
id_init_a
,
within_inames
=
(
insn
.
within_inames
-
frozenset
({
outer_iname
})
-
within_inames
=
(
insn
.
within_inames
-
frozenset
({
outer_iname
})
-
inames_micro
)
|
inames_head
,
inames_micro
)
|
inames_head
,
tags
=
frozenset
({
'
head_vec{}
'
.
format
(
vcl_size
)})))
tags
=
frozenset
({
'
head_vec{}
'
.
format
(
vcl_size
),
'
vectorized_{}
'
.
format
(
level
)})))
# setze werte für a und b
# setze werte für a und b
expr_right
=
substitute
(
expr_without_r
,
{
iname_ix
:
1
})
expr_right
=
substitute
(
expr_without_r
,
{
iname_ix
:
1
})
...
@@ -111,12 +112,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
...
@@ -111,12 +112,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
expression
=
expr_right
,
expression
=
expr_right
,
id
=
id_set_right
,
id
=
id_set_right
,
depends_on
=
insn
.
depends_on
,
depends_on
=
insn
.
depends_on
,
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
})))
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
}),
tags
=
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
new_insns
.
append
(
lp
.
Assignment
(
assignee
=
var_left
,
new_insns
.
append
(
lp
.
Assignment
(
assignee
=
var_left
,
expression
=
expr_left
,
expression
=
expr_left
,
id
=
id_set_left
,
id
=
id_set_left
,
depends_on
=
insn
.
depends_on
|
frozenset
({
id_init_a
}),
depends_on
=
insn
.
depends_on
|
frozenset
({
id_init_a
}),
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
})))
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
}),
tags
=
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
# r+=a[iy]
# r+=a[iy]
id_accum
=
idg
(
'
{}_mod_accum
'
.
format
(
insn
.
id
))
id_accum
=
idg
(
'
{}_mod_accum
'
.
format
(
insn
.
id
))
...
@@ -130,7 +133,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
...
@@ -130,7 +133,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
depends_on
=
insn
.
depends_on
|
frozenset
({
id_set_left
,
depends_on
=
insn
.
depends_on
|
frozenset
({
id_set_left
,
id_init_a
,
id_set_right
}),
id_init_a
,
id_set_right
}),
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
}),
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
}),
tags
=
frozenset
({
'
accum_vec{}
'
.
format
(
vcl_size
)})))
tags
=
frozenset
({
'
accum_vec{}
'
.
format
(
vcl_size
),
'
vectorized_{}
'
.
format
(
level
)})))
# a[iy] = permute
# a[iy] = permute
id_permute
=
idg
(
'
{}_permute
'
.
format
(
insn
.
id
))
id_permute
=
idg
(
'
{}_permute
'
.
format
(
insn
.
id
))
expr_permute
=
prim
.
Call
(
VCLPermute
(
nptype
,
vcl_size
,
(
vcl_size
-
1
,)
+
(
-
1
,)
*
(
vcl_size
-
1
)),
expr_permute
=
prim
.
Call
(
VCLPermute
(
nptype
,
vcl_size
,
(
vcl_size
-
1
,)
+
(
-
1
,)
*
(
vcl_size
-
1
)),
...
@@ -140,7 +144,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
...
@@ -140,7 +144,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
id
=
id_permute
,
id
=
id_permute
,
depends_on
=
insn
.
depends_on
|
frozenset
({
id_set_left
,
id_init_a
,
id_set_right
,
depends_on
=
insn
.
depends_on
|
frozenset
({
id_set_left
,
id_init_a
,
id_set_right
,
id_accum
}),
id_accum
}),
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
})
within_inames
=
insn
.
within_inames
-
frozenset
({
iname_ix
}),
tags
=
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})
))
))
# tail handling, uses tail alias
# tail handling, uses tail alias
...
@@ -161,7 +166,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
...
@@ -161,7 +166,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
frozenset
(
write_to_tail_ids
)),
frozenset
(
write_to_tail_ids
)),
within_inames
=
(
insn
.
within_inames
-
frozenset
({
inner_iname
,
outer_iname
})
-
within_inames
=
(
insn
.
within_inames
-
frozenset
({
inner_iname
,
outer_iname
})
-
inames_micro
)
|
inames_tail
,
inames_micro
)
|
inames_tail
,
tags
=
frozenset
({
'
tail_vec{}
'
.
format
(
vcl_size
)})))
tags
=
frozenset
({
'
tail_vec{}
'
.
format
(
vcl_size
),
'
vectorized_{}
'
.
format
(
level
)})))
else
:
else
:
if
insn
.
id
.
endswith
(
'
tail
'
)
and
insn
.
id
.
replace
(
'
_tail
'
,
''
)
in
accum_ids
:
if
insn
.
id
.
endswith
(
'
tail
'
)
and
insn
.
id
.
replace
(
'
_tail
'
,
''
)
in
accum_ids
:
accum_id
=
insn
.
id
.
replace
(
'
_tail
'
,
''
)
accum_id
=
insn
.
id
.
replace
(
'
_tail
'
,
''
)
...
@@ -240,7 +246,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
...
@@ -240,7 +246,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
call_load
=
prim
.
Call
(
VCLLoad
(
name_vec
),
(
prim
.
Sum
((
prim
.
Variable
(
name_alias
),
flat_index
)),))
call_load
=
prim
.
Call
(
VCLLoad
(
name_vec
),
(
prim
.
Sum
((
prim
.
Variable
(
name_alias
),
flat_index
)),))
load_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_load
,
load_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_load
,
id
=
load_id
,
within_inames
=
insn
.
within_inames
|
insn
.
reduction_inames
(),
id
=
load_id
,
within_inames
=
insn
.
within_inames
|
insn
.
reduction_inames
(),
depends_on
=
insn
.
depends_on
|
write_ids
,))
depends_on
=
insn
.
depends_on
|
write_ids
,
tags
=
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
read_dependencies
.
setdefault
(
id
,
set
())
read_dependencies
.
setdefault
(
id
,
set
())
read_dependencies
[
id
].
add
(
load_id
)
read_dependencies
[
id
].
add
(
load_id
)
...
@@ -268,7 +275,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
...
@@ -268,7 +275,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
store_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_store
,
store_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_store
,
id
=
store_id
,
within_inames
=
insn
.
within_inames
,
id
=
store_id
,
within_inames
=
insn
.
within_inames
,
depends_on
=
(
insn
.
depends_on
|
frozenset
({
id
})
|
read_dependencies
[
id
]
|
depends_on
=
(
insn
.
depends_on
|
frozenset
({
id
})
|
read_dependencies
[
id
]
|
write_ids
)))
write_ids
),
tags
=
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
# replace alias with vcl vector, except for accumulation assignee
# replace alias with vcl vector, except for accumulation assignee
vector_alias
=
[
a
for
a
in
knl
.
arg_dict
if
a
.
endswith
(
alias_suffix
)]
vector_alias
=
[
a
for
a
in
knl
.
arg_dict
if
a
.
endswith
(
alias_suffix
)]
...
@@ -291,14 +299,15 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
...
@@ -291,14 +299,15 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
new_insns
.
append
(
insn
)
new_insns
.
append
(
insn
)
knl_with_subst_insns
=
knl_with_subst_insns
.
copy
(
instructions
=
new_insns
)
knl_with_subst_insns
=
knl_with_subst_insns
.
copy
(
instructions
=
new_insns
)
# substitution rule for alias[ex_o
uter
,ex_inner, ey, ix, iy] -> vec[ex_inner]
# substitution rule for alias[
[
ex_o
]*l
,ex_inner, ey, ix, iy] -> vec[ex_inner]
parameters
=
'
,
'
.
join
([
'
ex_o{}
'
.
format
(
l
)
for
l
in
range
(
level
+
1
)])
+
\
parameters
=
'
,
'
.
join
([
'
ex_o{}
'
.
format
(
l
)
for
l
in
range
(
level
+
1
)])
+
\
'
,v_i,
'
+
\
'
,v_i,
'
+
\
'
,
'
.
join
([
'
e
'
+
d
for
d
in
dim_names
[
1
:
dim
]])
+
\
'
,
'
.
join
([
'
e
'
+
d
for
d
in
dim_names
[
1
:
dim
]])
+
\
'
,ix,
'
+
\
'
,ix,
'
+
\
'
,
'
.
join
([
'
i
'
+
d
for
d
in
dim_names
[
1
:
dim
]])
'
,
'
.
join
([
'
i
'
+
d
for
d
in
dim_names
[
1
:
dim
]])
knl_with_subst_insns
=
lp
.
extract_subst
(
knl_with_subst_insns
,
alias
+
'
_subst
'
,
'
{}[{}]
'
.
format
(
alias
,
parameters
),
knl_with_subst_insns
=
lp
.
extract_subst
(
knl_with_subst_insns
,
parameters
=
parameters
)
alias
+
'
_subst
'
,
'
{}[{}]
'
.
format
(
alias
,
parameters
),
parameters
=
parameters
)
new_subst
=
knl_with_subst_insns
.
substitutions
.
copy
()
new_subst
=
knl_with_subst_insns
.
substitutions
.
copy
()
rule
=
new_subst
[
alias
+
'
_subst
'
]
rule
=
new_subst
[
alias
+
'
_subst
'
]
rule
.
expression
=
prim
.
Subscript
(
prim
.
Variable
(
alias
.
replace
(
alias_suffix
,
vector_sufix
)),
rule
.
expression
=
prim
.
Subscript
(
prim
.
Variable
(
alias
.
replace
(
alias_suffix
,
vector_sufix
)),
...
@@ -330,10 +339,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
...
@@ -330,10 +339,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
raise
CodegenVectorizationError
raise
CodegenVectorizationError
new_insns
.
append
(
insn
.
copy
(
assignee
=
assignee_vec
,
new_insns
.
append
(
insn
.
copy
(
assignee
=
assignee_vec
,
depends_on
=
(
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
depends_on
=
(
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
write_ids
)))
write_ids
),
tags
=
insn
.
tags
|
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
else
:
else
:
new_insns
.
append
(
insn
.
copy
(
depends_on
=
(
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
new_insns
.
append
(
insn
.
copy
(
depends_on
=
(
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
write_ids
)))
write_ids
),
tags
=
insn
.
tags
|
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
return
knl
.
copy
(
instructions
=
new_insns
+
load_insns
+
store_insns
)
return
knl
.
copy
(
instructions
=
new_insns
+
load_insns
+
store_insns
)
...
@@ -387,29 +398,28 @@ def add_iname_array(knl, iname):
...
@@ -387,29 +398,28 @@ def add_iname_array(knl, iname):
return
knl
return
knl
def
add_vcl_iname_array
(
knl
,
iname
,
vec_iname
,
vcl_size
):
def
add_vcl_iname_array
(
knl
,
iname
,
vec_iname
,
vcl_size
,
level
):
insns_with_macro_points
=
lp
.
find_instructions
(
knl
,
And
((
Tagged
(
iname
),
Iname
(
vec_iname
))))
insns_with_macro_points
=
lp
.
find_instructions
(
knl
,
And
((
Tagged
(
iname
),
Iname
(
vec_iname
))))
if
insns_with_macro_points
:
if
insns_with_macro_points
:
iname_array
=
iname
+
'
_arr
'
iname_array
=
iname
+
'
_arr
'
vector_name
=
iname
+
'
_vec{}
'
.
format
(
vcl_size
)
vector_name
=
iname
+
'
_vec{}
'
.
format
(
vcl_size
)
new_temporaries
=
dict
()
new_temporaries
=
{
vector_name
:
DuneTemporaryVariable
(
vector_name
,
managed
=
True
,
new_temporaries
[
vector_name
]
=
DuneTemporaryVariable
(
vector_name
,
managed
=
True
,
shape
=
(
get_form_option
(
'
number_of_blocks
'
),),
shape
=
(
get_form_option
(
'
number_of_blocks
'
),),
scope
=
lp
.
temp_var_scope
.
PRIVATE
,
dtype
=
np
.
float64
,
scope
=
lp
.
temp_var_scope
.
PRIVATE
,
dtype
=
np
.
float64
,
base_storage
=
iname_array
+
'
_buff
'
,
base_storage
=
iname_array
+
'
_buff
'
,
_base_storage_access_may_be_aliasing
=
True
)}
_base_storage_access_may_be_aliasing
=
True
)
silenced_warning
=
[
"
read_no_write({})
"
.
format
(
vector_name
)]
silenced_warning
=
[
"
read_no_write({})
"
.
format
(
vector_name
)]
replacemap
=
dict
()
replacemap
=
{
iname_array
:
prim
.
Variable
(
vector_name
)}
replacemap
[
iname_array
]
=
prim
.
Variable
(
vector_name
)
new_insns
=
[]
new_insns
=
[]
for
insn
in
knl
.
instructions
:
for
insn
in
knl
.
instructions
:
if
insn
in
insns_with_macro_points
:
if
insn
in
insns_with_macro_points
:
transformed_insn
=
insn
.
with_transformed_expressions
(
lambda
expr
:
substitute
(
expr
,
replacemap
))
transformed_insn
=
insn
.
with_transformed_expressions
(
lambda
expr
:
substitute
(
expr
,
replacemap
))
new_insns
.
append
(
transformed_insn
.
copy
(
depends_on
=
'
init_{}_buffer
'
.
format
(
iname_array
)))
new_insns
.
append
(
transformed_insn
.
copy
(
depends_on
=
'
init_{}_buffer
'
.
format
(
iname_array
),
tags
=
insn
.
tags
|
frozenset
({
'
vectorized_{}
'
.
format
(
level
)})))
else
:
else
:
new_insns
.
append
(
insn
)
new_insns
.
append
(
insn
)
...
@@ -423,7 +433,7 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size):
...
@@ -423,7 +433,7 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size):
return
knl
return
knl
def
realize_tail
(
knl
,
inner_iname
,
outer_iname
,
outer_bound
,
tail_iname
,
vcl_size
):
def
realize_tail
(
knl
,
inner_iname
,
outer_iname
,
outer_bound
,
tail_iname
,
vcl_size
,
level
):
tail_size
=
get_form_option
(
'
number_of_blocks
'
)
%
vcl_size
tail_size
=
get_form_option
(
'
number_of_blocks
'
)
%
vcl_size
new_dom
=
BasicSet
(
"
{{ [{0}] : 0<={0}<{1} }}
"
.
format
(
tail_iname
,
tail_size
))
new_dom
=
BasicSet
(
"
{{ [{0}] : 0<={0}<{1} }}
"
.
format
(
tail_iname
,
tail_size
))
...
@@ -451,7 +461,8 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
...
@@ -451,7 +461,8 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
new_within_inames
=
frozenset
((
iname
+
'
_tail
'
if
iname
==
inner_iname
else
iname
new_within_inames
=
frozenset
((
iname
+
'
_tail
'
if
iname
==
inner_iname
else
iname
for
iname
in
insn
.
within_inames
))
-
frozenset
({
outer_iname
})
for
iname
in
insn
.
within_inames
))
-
frozenset
({
outer_iname
})
new_insns
.
append
(
new_insn
.
copy
(
id
=
insn
.
id
+
'
_tail
'
,
depends_on
=
new_depends_on
,
new_insns
.
append
(
new_insn
.
copy
(
id
=
insn
.
id
+
'
_tail
'
,
depends_on
=
new_depends_on
,
within_inames
=
new_within_inames
))
within_inames
=
new_within_inames
,
tags
=
insn
.
tags
|
frozenset
({
'
tail_{}
'
.
format
(
level
)})))
knl
=
knl
.
copy
(
domains
=
knl
.
domains
+
[
new_dom
],
instructions
=
knl
.
instructions
+
new_insns
,
knl
=
knl
.
copy
(
domains
=
knl
.
domains
+
[
new_dom
],
instructions
=
knl
.
instructions
+
new_insns
,
temporary_variables
=
dict
(
**
knl
.
temporary_variables
,
**
temporaries_to_duplicate
))
temporary_variables
=
dict
(
**
knl
.
temporary_variables
,
**
temporaries_to_duplicate
))
...
@@ -471,6 +482,21 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
...
@@ -471,6 +482,21 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
return
lp
.
make_reduction_inames_unique
(
knl
)
return
lp
.
make_reduction_inames_unique
(
knl
)
def
add_tail_dependencies
(
knl
,
level
):
vectorized_insns
=
lp
.
find_instructions
(
knl
,
Tagged
(
'
vectorized_{}
'
.
format
(
level
)))
vectorized_ids
=
frozenset
((
insn
.
id
for
insn
in
vectorized_insns
))
tail_insns
=
lp
.
find_instructions
(
knl
,
Tagged
(
'
tail_{}
'
.
format
(
level
)))
new_insns
=
[]
for
insn
in
knl
.
instructions
:
if
insn
in
tail_insns
:
new_insns
.
append
(
insn
.
copy
(
depends_on
=
insn
.
depends_on
|
vectorized_ids
))
else
:
new_insns
.
append
(
insn
)
return
knl
.
copy
(
instructions
=
new_insns
)
def
vectorize_micro_elements
(
knl
):
def
vectorize_micro_elements
(
knl
):
vec_iname
=
"
subel_x
"
vec_iname
=
"
subel_x
"
orig_iname
=
vec_iname
orig_iname
=
vec_iname
...
@@ -503,7 +529,7 @@ def vectorize_micro_elements(knl):
...
@@ -503,7 +529,7 @@ def vectorize_micro_elements(knl):
knl
=
lp
.
split_iname
(
knl
,
vec_iname
,
vcl_size
,
outer_iname
=
outer_iname
,
inner_iname
=
inner_iname
)
knl
=
lp
.
split_iname
(
knl
,
vec_iname
,
vcl_size
,
outer_iname
=
outer_iname
,
inner_iname
=
inner_iname
)
tail_iname
=
vec_iname
+
'
_inner
'
+
'
_tail
'
tail_iname
=
vec_iname
+
'
_inner
'
+
'
_tail
'
knl
=
realize_tail
(
knl
,
inner_iname
,
outer_iname
,
iname_bound
,
tail_iname
,
vcl_size
)
knl
=
realize_tail
(
knl
,
inner_iname
,
outer_iname
,
iname_bound
,
tail_iname
,
vcl_size
,
level
)
else
:
else
:
knl
=
lp
.
split_iname
(
knl
,
vec_iname
,
vcl_size
)
knl
=
lp
.
split_iname
(
knl
,
vec_iname
,
vcl_size
)
...
@@ -512,16 +538,17 @@ def vectorize_micro_elements(knl):
...
@@ -512,16 +538,17 @@ def vectorize_micro_elements(knl):
array_alias
=
[
a
for
a
in
knl
.
arg_dict
.
keys
()
if
a
.
endswith
(
'
alias
'
)
or
a
.
endswith
(
'
tail
'
)]
array_alias
=
[
a
for
a
in
knl
.
arg_dict
.
keys
()
if
a
.
endswith
(
'
alias
'
)
or
a
.
endswith
(
'
tail
'
)]
knl
=
lp
.
split_array_axis
(
knl
,
array_alias
,
level
,
vcl_size
)
knl
=
lp
.
split_array_axis
(
knl
,
array_alias
,
level
,
vcl_size
)
knl
=
add_vcl_iname_array
(
knl
,
orig_iname
,
inner_iname
,
vcl_size
)
knl
=
add_vcl_temporaries
(
knl
,
vcl_size
)
knl
=
add_vcl_temporaries
(
knl
,
vcl_size
)
knl
=
add_vcl_accum_insns
(
knl
,
inner_iname
,
outer_iname
,
vcl_size
)
knl
=
add_vcl_iname_array
(
knl
,
orig_iname
,
inner_iname
,
vcl_size
,
level
)
knl
=
add_vcl_accum_insns
(
knl
,
inner_iname
,
outer_iname
,
vcl_size
,
level
)
knl
=
add_vcl_access
(
knl
,
inner_iname
,
vcl_size
,
level
)
knl
=
add_vcl_access
(
knl
,
inner_iname
,
vcl_size
,
level
)
if
tail_size
>
0
and
vectorize_tail
:
if
tail_size
>
0
:
knl
=
_do_vectorization
(
knl
,
tail_iname
,
tail_size
,
tail_vcl_size
,
level
+
1
)
knl
=
add_tail_dependencies
(
knl
,
level
)
if
vectorize_tail
:
knl
=
_do_vectorization
(
knl
,
tail_iname
,
tail_size
,
tail_vcl_size
,
level
+
1
)
return
knl
return
knl
knl
=
_do_vectorization
(
knl
,
orig_iname
,
get_form_option
(
'
number_of_blocks
'
),
vcl_size
)
knl
=
_do_vectorization
(
knl
,
orig_iname
,
get_form_option
(
'
number_of_blocks
'
),
vcl_size
)
return
knl
return
knl
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment