diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index f6a663dba4d5083f70fa437873c24a32a405e8fe..07a2f87cebe354d4d9bd56dd9690895d6435674d 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -2,6 +2,7 @@ from dune.perftool.generation import post_include from dune.perftool.loopy.temporary import DuneTemporaryVariable from dune.perftool.pdelab.spaces import LFSLocalIndex +from dune.perftool.loopy.types import VCLTypeRegistry from loopy.target import (TargetBase, ASTBuilderBase, @@ -10,6 +11,7 @@ from loopy.target import (TargetBase, from loopy.target.c import CASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper, CExpressionToCodeMapper from loopy.symbolic import FunctionIdentifier +from loopy.types import NumpyType from pymbolic.primitives import Call, Subscript, Variable @@ -72,8 +74,14 @@ class DuneTarget(TargetBase): return DuneASTBuilder(self) def dtype_to_typename(self, dtype): - # For now, we do this the simplest possible way - return _registry[dtype.dtype.name] + if dtype.dtype.kind == "V": + return VCLTypeRegistry.names[dtype.dtype] + else: + return _registry[dtype.dtype.name] def is_vector_dtype(self, dtype): return False + + def vector_dtype(self, base, count): + return NumpyType(VCLTypeRegistry.types[base.numpy_dtype, count], + target=self) diff --git a/python/dune/perftool/loopy/types.py b/python/dune/perftool/loopy/types.py new file mode 100644 index 0000000000000000000000000000000000000000..48eff539ac50be5e5305fe6379bc6bd443359c11 --- /dev/null +++ b/python/dune/perftool/loopy/types.py @@ -0,0 +1,38 @@ +""" +Our extensions to the loopy type system +""" +import numpy as np + + +class VCLTypeRegistry: + pass + + +def _populate_vcl_type_registry(): + VCLTypeRegistry.types = {} + VCLTypeRegistry.names = {} + + # The base types that we are working with! + for base_name, base_type, abbrev in [('float', np.float32, 'f'), + ('double', np.float64, 'd'), + ]: + # The vector width in bits we are considering! + for vector_bits in [128, 256, 512]: + # Calculate the vector lane width + count = vector_bits // (np.dtype(base_type).itemsize * 8) + + # Define the name of this vector type + name = "Vec{}{}".format(count, abbrev) + + # Construct the numpy dtype! + fieldnames = tuple("x" + str(i) for i in range(count)) + dtype = np.dtype(dict(names=fieldnames, + formats=[base_type] * count, + ) + ) + + VCLTypeRegistry.types[np.dtype(base_type), count] = dtype + VCLTypeRegistry.names[dtype] = name + + +_populate_vcl_type_registry()