Skip to content
Snippets Groups Projects
Commit 733675a9 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

WIP checkpoint 1

parent 796c8301
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@ from pymbolic.mapper.substitutor import make_subst_func
import loopy as lp
import pymbolic.primitives as prim
from pytools import ImmutableRecord
from six.moves import intern
......@@ -15,35 +16,72 @@ from six.moves import intern
#
class SumfactKernel(prim.Variable):
class SumfactKernel(ImmutableRecord, prim.Variable):
def __init__(self,
a_matrices,
buffer,
stage,
preferred_position,
restriction,
a_matrices=None,
buffer=None,
stage=1,
preferred_position=None,
restriction=0,
within_inames=frozenset(),
input=None,
padding=frozenset(),
index=None,
):
# Check the input and apply defaults where necessary
assert isinstance(a_matrices, tuple)
from dune.perftool.sumfact.amatrix import AMatrixBase
assert all(isinstance(m, AMatrixBase) for m in a_matrices)
assert stage in (1, 3)
if preferred_position is not None:
assert isinstance(preferred_position, int)
if not isinstance(restriction, tuple):
restriction = (restriction, 0)
self.a_matrices = a_matrices
self.buffer = buffer
self.stage = stage
self.preferred_position = preferred_position
self.restriction = restriction
assert isinstance(within_inames, frozenset)
ImmutableRecord.__init__(self,
a_matrices=a_matrices,
buffer=buffer,
stage=stage,
preferred_position=preferred_position,
restriction=restriction,
within_inames=within_inames,
input=input,
padding=padding,
index=index,
)
prim.Variable.__init__(self, "SUMFACT")
#
# The methods/fields needed to get a well-formed pymbolic node
#
def __getinitargs__(self):
return (self.a_matrices, self.buffer, self.stage, self.preferred_position, self.restriction)
return (self.a_matrices, self.buffer, self.stage, self.preferred_position, self.restriction, self.within_inames, self.input, self.padding, self.index)
def stringifier(self):
return lp.symbolic.StringifyMapper
init_arg_names = ("a_matrices", "buffer", "stage", "preferred_position", "restriction")
init_arg_names = ("a_matrices", "buffer", "stage", "preferred_position", "restriction", "within_inames", "input", "padding", "index")
mapper_method = "map_sumfact_kernel"
#
# Some convenience methods to extract information about the sum factorization kernel
#
@property
def length(self):
return len(self.a_matrices)
@property
def vectorized(self):
return next(iter(a_matrices)).vectorized
class FusedMultiplyAdd(prim.Expression):
""" Represents an FMA operation """
......
......@@ -35,15 +35,19 @@ import loopy as lp
import numpy
class AMatrix(ImmutableRecord):
class AMatrixBase(ImmutableRecord):
pass
class AMatrix(AMatrixBase):
def __init__(self, rows, cols, transpose=False, derivative=False, face=None):
ImmutableRecord.__init__(self,
rows=rows,
cols=cols,
transpose=transpose,
derivative=derivative,
face=face,
)
AMatrixBase.__init__(self,
rows=rows,
cols=cols,
transpose=transpose,
derivative=derivative,
face=face,
)
@property
def name(self):
......@@ -57,16 +61,16 @@ class AMatrix(ImmutableRecord):
return lp.TaggedVariable(name, "sumfac")
class LargeAMatrix(ImmutableRecord):
class LargeAMatrix(AMatrixBase):
def __init__(self, rows, cols, transpose, derivative, face):
assert isinstance(derivative, tuple)
ImmutableRecord.__init__(self,
rows=rows,
cols=cols,
transpose=transpose,
derivative=derivative,
face=face,
)
AMatrixBase.__init__(self,
rows=rows,
cols=cols,
transpose=transpose,
derivative=derivative,
face=face,
)
@property
def name(self):
......
......@@ -6,6 +6,7 @@ multiplication with the test function is part of the sum factorization kernel.
from dune.perftool.generation import (backend,
domain,
get_counted_variable,
get_counter,
get_global_context_value,
iname,
......@@ -74,9 +75,26 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
facemod=get_facemod(restriction),
)
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buf, inp, index, padding = get_vectorization_info(a_matrices, restriction)
sf = SumfactKernel(a_matrices=a_matrices,
restriction=restriction,
preferred_position=i,
)
from dune.perftool.sumfact.vectorization import attach_vectorization_info
sf = attach_vectorization_info(sf)
# Extract again, for compatibility
# TODO away!
a_matrices = sf.a_matrices
buf = sf.buffer
inp = sf.input
index = sf.index
padding = sf.padding
if buf is None:
buf = get_counted_variable("buffer")
if inp is None:
inp = get_counted_variable("input")
# Initialize the buffer for the sum fact kernel
shape = (product(mat.cols for mat in a_matrices),)
......@@ -101,7 +119,8 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
# Add a sum factorization kernel that implements the
# evaluation of the gradients of basis functions at quadrature
# points (stage 1)
var, insn_dep = sum_factorization_kernel(a_matrices,
if not get_global_context_value("dry_run", False):
var, insn_dep = sum_factorization_kernel(a_matrices,
buf,
1,
preferred_position=i,
......@@ -110,6 +129,9 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
outshape=tuple(mat.rows for mat in a_matrices if mat.face is None),
direct_input=direct_input,
)
else:
var = sf
buffers.append(var)
# Check whether we want to return early with something that has the indexing
......@@ -144,9 +166,26 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor):
a_matrices = construct_amatrix_sequence(facedir=get_facedir(restriction),
facemod=get_facemod(restriction),)
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buf, inp, index, padding = get_vectorization_info(a_matrices, restriction)
sf = SumfactKernel(a_matrices=a_matrices,
restriction=restriction,
)
# TODO: Move this away!
from dune.perftool.sumfact.vectorization import attach_vectorization_info
sf = attach_vectorization_info(sf)
# Extract again, for compatibility
# TODO away!
a_matrices = sf.a_matrices
buf = sf.buffer
inp = sf.input
index = sf.index
padding = sf.padding
if buf is None:
buf = get_counted_variable("buffer")
if inp is None:
inp = get_counted_variable("input")
# Flip flop buffers for sumfactorization
shape = (product(mat.cols for mat in a_matrices),)
......@@ -169,7 +208,8 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor):
# Add a sum factorization kernel that implements the evaluation of
# the basis functions at quadrature points (stage 1)
var, _ = sum_factorization_kernel(a_matrices,
if not get_global_context_value("dry_run", False):
var, _ = sum_factorization_kernel(a_matrices,
buf,
1,
preferred_position=None,
......@@ -178,6 +218,8 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor):
restriction=restriction,
direct_input=direct_input,
)
else:
var = sf
if index:
index = (index,)
......
......@@ -14,6 +14,7 @@ from dune.perftool.generation import (backend,
dump_accumulate_timer,
function_mangler,
generator_factory,
get_counted_variable,
get_counter,
get_global_context_value,
globalarg,
......@@ -143,9 +144,28 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
facemod=get_facemod(accterm.argument.restriction),
)
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buf, inp, index, padding = get_vectorization_info(a_matrices, (accterm.argument.restriction, restriction))
sf = SumfactKernel(a_matrices=a_matrices,
restriction=(accterm.argument.restriction, restriction),
stage=3,
preferred_position=i if accterm.new_indices else None
)
# TODO: Move this away!
from dune.perftool.sumfact.vectorization import attach_vectorization_info
sf = attach_vectorization_info(sf)
# Extract again, for compatibility
# TODO away!
a_matrices = sf.a_matrices
buf = sf.buffer
inp = sf.input
index = sf.index
padding = sf.padding
if buf is None:
buf = get_counted_variable("buffer")
if inp is None:
inp = get_counted_variable("input")
# Initialize a base storage for this buffer and get a temporay pointing to it
shape = tuple(mat.cols for mat in a_matrices if mat.face is None)
......@@ -262,7 +282,8 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
# Add a sum factorization kernel that implements the multiplication
# with the test function (stage 3)
pref_pos = i if accterm.new_indices else None
result, insn_dep = sum_factorization_kernel(a_matrices,
if not get_global_context_value("dry_run", False):
result, insn_dep = sum_factorization_kernel(a_matrices,
buf,
3,
insn_dep=insn_dep,
......@@ -273,7 +294,8 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
direct_output=direct_output,
visitor=visitor
)
else:
result = sf
# Determine the expression to accumulate with. This depends on the vectorization strategy!
result = prim.Subscript(result, tuple(prim.Variable(i) for i in inames))
vecinames = ()
......
""" Sum factorization vectorization """
from dune.perftool.loopy.symbolic import SumfactKernel
from dune.perftool.generation import (generator_factory,
get_counted_variable,
get_global_context_value,
)
from dune.perftool.pdelab.restriction import (Restriction,
restricted_name,
......@@ -12,35 +14,27 @@ from dune.perftool.options import get_option
import loopy as lp
@generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda a, r, *args: (a, r))
def vectorization_info(a_matrices, restriction, new_a_matrices, buf, inp, index, padding):
return (new_a_matrices, buf, inp, index, padding)
@generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda o, n: o)
def _cache_vectorization_info(old, new):
if new is None:
raise PerftoolError("Vectorization info for sum factorization kernel was not gathered correctly!")
else:
print("Registering old: {}".format(repr(old)))
print("Registering new: {}".format(repr(new)))
return new
def get_vectorization_info(a_matrices, restriction):
if not isinstance(restriction, tuple):
restriction = (restriction, 0)
from dune.perftool.generation import get_global_context_value
def attach_vectorization_info(sf):
assert isinstance(sf, SumfactKernel)
if get_global_context_value("dry_run"):
# Return dummy data
return (a_matrices, get_counted_variable("buffer"), get_counted_variable("input"), None, frozenset())
# Try getting the vectorization info collected after dry run
vec = vectorization_info(a_matrices, restriction, None, None, None, None, None)
if vec[0] is None:
raise PerftoolError("Sumfact Vectorization data should have been collected in dry run, but wasnt")
return vec
return sf
else:
return _cache_vectorization_info(sf, None)
def no_vectorization(sumfacts):
for sumf in sumfacts:
vectorization_info(sumf.a_matrices,
sumf.restriction,
sumf.a_matrices,
get_counted_variable("buffer"),
get_counted_variable("input"),
None,
frozenset())
for sf in sumfacts:
_cache_vectorization_info(sf, sf)
def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
......@@ -88,7 +82,13 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
large_a_matrices.append(large)
for sumf in stage_sumfacts:
vectorization_info(sumf.a_matrices, sumf.restriction, tuple(large_a_matrices), buf, inp, position_mapping[sumf], frozenset(available))
_cache_vectorization_info(sumf,
sumf.copy(a_matrices=tuple(large_a_matrices),
buffer=buf,
input=inp,
index=position_mapping[sumf],
padding=frozenset(available))
)
else:
# Disable vectorization strategy
no_vectorization(stage_sumfacts)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment