Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dune-codegen
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Christian Heinigk
dune-codegen
Commits
2c9cdfc6
Commit
2c9cdfc6
authored
7 years ago
by
Dominic Kempf
Browse files
Options
Downloads
Patches
Plain Diff
First improvement of vectorization strategy
parent
f84650ac
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
python/dune/perftool/sumfact/symbolic.py
+23
-8
23 additions, 8 deletions
python/dune/perftool/sumfact/symbolic.py
python/dune/perftool/sumfact/vectorization.py
+21
-25
21 additions, 25 deletions
python/dune/perftool/sumfact/vectorization.py
with
44 additions
and
33 deletions
python/dune/perftool/sumfact/symbolic.py
+
23
−
8
View file @
2c9cdfc6
...
@@ -130,12 +130,35 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
...
@@ -130,12 +130,35 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
mapper_method
=
"
map_sumfact_kernel
"
mapper_method
=
"
map_sumfact_kernel
"
#
# Some cache key definitions
# Watch out for the documentation to see which key is used unter what circumstances
#
@property
def
cache_key
(
self
):
"""
The cache key that can be used in generation magic
Any two sum factorization kernels having the same cache_key
are realized simulatenously!
"""
return
(
self
.
matrix_sequence
,
self
.
restriction
,
self
.
stage
,
self
.
buffer
)
@property
def
input_key
(
self
):
"""
A cache key for the input coefficients
Any two sum factorization kernels having the same input_key
work on the same input coefficient (and are suitable for simultaneous
treatment because of that)
"""
return
(
self
.
restriction
,
self
.
stage
,
self
.
coeff_func
,
self
.
element
,
self
.
component
,
self
.
accumvar
)
#
#
# Some convenience methods to extract information about the sum factorization kernel
# Some convenience methods to extract information about the sum factorization kernel
#
#
@property
@property
def
length
(
self
):
def
length
(
self
):
"""
The number of matrices to apply
"""
return
len
(
self
.
matrix_sequence
)
return
len
(
self
.
matrix_sequence
)
@property
@property
...
@@ -146,14 +169,6 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
...
@@ -146,14 +169,6 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
def
transposed
(
self
):
def
transposed
(
self
):
return
next
(
iter
(
self
.
matrix_sequence
)).
transpose
return
next
(
iter
(
self
.
matrix_sequence
)).
transpose
@property
def
cache_key
(
self
):
"""
The cache key that can be used in generation magic,
Any two sum factorization kernels having the same cache_key
are realized simulatneously!
"""
return
hash
((
self
.
matrix_sequence
,
self
.
restriction
,
self
.
stage
,
self
.
buffer
))
@property
@property
def
vec_index
(
self
):
def
vec_index
(
self
):
"""
A tuple with the vector index
"""
A tuple with the vector index
...
...
This diff is collapsed.
Click to expand it.
python/dune/perftool/sumfact/vectorization.py
+
21
−
25
View file @
2c9cdfc6
...
@@ -39,13 +39,12 @@ def no_vectorization(sumfacts):
...
@@ -39,13 +39,12 @@ def no_vectorization(sumfacts):
input
=
get_counted_variable
(
"
input
"
)))
input
=
get_counted_variable
(
"
input
"
)))
def
decide_stage_vectorization_strategy
(
sumfacts
,
stage
,
restriction
):
def
horizontal_vectorization_strategy
(
sumfacts
):
stage_sumfacts
=
frozenset
([
sf
for
sf
in
sumfacts
if
sf
.
stage
==
stage
and
sf
.
restriction
==
restriction
])
if
len
(
sumfacts
)
in
(
3
,
4
):
if
len
(
stage_sumfacts
)
in
(
3
,
4
):
# Map the sum factorization to their position in the joint kernel
# Map the sum factorization to their position in the joint kernel
position_mapping
=
{}
position_mapping
=
{}
available
=
set
(
range
(
4
))
available
=
set
(
range
(
4
))
for
sf
in
stage_
sumfacts
:
for
sf
in
sumfacts
:
if
sf
.
preferred_position
is
not
None
:
if
sf
.
preferred_position
is
not
None
:
# This asserts that no two kernels want to take the same position
# This asserts that no two kernels want to take the same position
# Later on, more complicated stuff might be necessary here.
# Later on, more complicated stuff might be necessary here.
...
@@ -54,7 +53,7 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
...
@@ -54,7 +53,7 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
position_mapping
[
sf
]
=
sf
.
preferred_position
position_mapping
[
sf
]
=
sf
.
preferred_position
# Choose a position for those that have no preferred one!
# Choose a position for those that have no preferred one!
for
sumf
in
stage_
sumfacts
:
for
sumf
in
sumfacts
:
if
sumf
.
preferred_position
is
None
:
if
sumf
.
preferred_position
is
None
:
position_mapping
[
sumf
]
=
available
.
pop
()
position_mapping
[
sumf
]
=
available
.
pop
()
...
@@ -64,37 +63,37 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
...
@@ -64,37 +63,37 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
# Collect the large matrices!
# Collect the large matrices!
large_matrix_sequence
=
[]
large_matrix_sequence
=
[]
for
i
in
range
(
len
(
next
(
iter
(
stage_
sumfacts
)).
matrix_sequence
)):
for
i
in
range
(
len
(
next
(
iter
(
sumfacts
)).
matrix_sequence
)):
# Assert that the matrices of all sum factorizations have the same size
# Assert that the matrices of all sum factorizations have the same size
assert
len
(
set
(
tuple
(
sf
.
matrix_sequence
[
i
].
rows
for
sf
in
stage_
sumfacts
)))
==
1
assert
len
(
set
(
tuple
(
sf
.
matrix_sequence
[
i
].
rows
for
sf
in
sumfacts
)))
==
1
assert
len
(
set
(
tuple
(
sf
.
matrix_sequence
[
i
].
cols
for
sf
in
stage_
sumfacts
)))
==
1
assert
len
(
set
(
tuple
(
sf
.
matrix_sequence
[
i
].
cols
for
sf
in
sumfacts
)))
==
1
# Collect the derivative information
# Collect the derivative information
derivative
=
[
False
]
*
4
derivative
=
[
False
]
*
4
for
sf
in
stage_
sumfacts
:
for
sf
in
sumfacts
:
derivative
[
position_mapping
[
sf
]]
=
sf
.
matrix_sequence
[
i
].
derivative
derivative
[
position_mapping
[
sf
]]
=
sf
.
matrix_sequence
[
i
].
derivative
large
=
BasisTabulationMatrixArray
(
rows
=
next
(
iter
(
stage_
sumfacts
)).
matrix_sequence
[
i
].
rows
,
large
=
BasisTabulationMatrixArray
(
rows
=
next
(
iter
(
sumfacts
)).
matrix_sequence
[
i
].
rows
,
cols
=
next
(
iter
(
stage_
sumfacts
)).
matrix_sequence
[
i
].
cols
,
cols
=
next
(
iter
(
sumfacts
)).
matrix_sequence
[
i
].
cols
,
transpose
=
next
(
iter
(
stage_
sumfacts
)).
matrix_sequence
[
i
].
transpose
,
transpose
=
next
(
iter
(
sumfacts
)).
matrix_sequence
[
i
].
transpose
,
derivative
=
tuple
(
derivative
),
derivative
=
tuple
(
derivative
),
face
=
next
(
iter
(
stage_
sumfacts
)).
matrix_sequence
[
i
].
face
,
face
=
next
(
iter
(
sumfacts
)).
matrix_sequence
[
i
].
face
,
)
)
large_matrix_sequence
.
append
(
large
)
large_matrix_sequence
.
append
(
large
)
for
sumf
in
stage_
sumfacts
:
for
sumf
in
sumfacts
:
_cache_vectorization_info
(
sumf
,
_cache_vectorization_info
(
sumf
,
sumf
.
copy
(
matrix_sequence
=
tuple
(
large_matrix_sequence
),
sumf
.
copy
(
matrix_sequence
=
tuple
(
large_matrix_sequence
),
buffer
=
buf
,
buffer
=
buf
,
input
=
inp
,
input
=
inp
,
index
=
position_mapping
[
sumf
],
index
=
position_mapping
[
sumf
],
padding
=
frozenset
(
available
),
padding
=
frozenset
(
available
),
insn_dep
=
frozenset
().
union
(
sf
.
insn_dep
for
sf
in
stage_
sumfacts
),
insn_dep
=
frozenset
().
union
(
sf
.
insn_dep
for
sf
in
sumfacts
),
)
)
)
)
else
:
else
:
# Disable vectorization strategy
# Disable vectorization strategy
no_vectorization
(
stage_
sumfacts
)
no_vectorization
(
sumfacts
)
def
decide_vectorization_strategy
():
def
decide_vectorization_strategy
():
...
@@ -108,12 +107,9 @@ def decide_vectorization_strategy():
...
@@ -108,12 +107,9 @@ def decide_vectorization_strategy():
if
not
get_option
(
"
vectorize_grads
"
):
if
not
get_option
(
"
vectorize_grads
"
):
no_vectorization
(
sumfacts
)
no_vectorization
(
sumfacts
)
else
:
else
:
res
=
(
Restriction
.
NONE
,
Restriction
.
POSITIVE
,
Restriction
.
NEGATIVE
)
# Currently we base our idea here on the fact that we only group sum
# Stage 1 kernels
# factorization kernels with the same input.
for
restriction
in
res
:
inputkeys
=
set
(
sf
.
input_key
for
sf
in
sumfacts
)
decide_stage_vectorization_strategy
(
sumfacts
,
1
,
restriction
)
for
inputkey
in
inputkeys
:
sumfact_filter
=
[
sf
for
sf
in
sumfacts
if
sf
.
input_key
==
inputkey
]
# Stage 3 kernels
horizontal_vectorization_strategy
(
sumfact_filter
)
import
itertools
as
it
for
restriction
in
it
.
product
(
res
,
res
):
decide_stage_vectorization_strategy
(
sumfacts
,
3
,
restriction
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment