From b2f42bb9b384a7a924b5cafecf4b28815d57d413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Thu, 13 Oct 2016 15:54:34 +0200 Subject: [PATCH] Create buffers and some prep. for sumfact --- python/dune/perftool/pdelab/quadrature.py | 2 +- python/dune/perftool/sumfact/amatrix.py | 19 +++- python/dune/perftool/sumfact/sumfact.py | 123 ++++++++++++++++++++++ test/sumfact/poisson/poisson.ufl | 8 +- 4 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 python/dune/perftool/sumfact/sumfact.py diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py index 91a25b33..637d391c 100644 --- a/python/dune/perftool/pdelab/quadrature.py +++ b/python/dune/perftool/pdelab/quadrature.py @@ -70,7 +70,7 @@ def name_order(): # TODO how to determine integration order in a generic fashion here? # Or maybe just read it from the ini file? Or define some mapping from the # finite elements? - return "2" + return "4" def quadrature_loop_statement(): diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py index f1b8a620..637c77de 100644 --- a/python/dune/perftool/sumfact/amatrix.py +++ b/python/dune/perftool/sumfact/amatrix.py @@ -29,7 +29,7 @@ def quadrature_points_per_direction(): # as soon as we have a generic implementation # Quadrature order - q = 2 + q = 4 # Quadrature points in per direction nb_qp = q // 2 + 1 @@ -159,6 +159,7 @@ def construct_theta(name): @class_member("operator") def typedef_theta(name): + include_file("dune/perftool/sumfact/alignedmatvec.hh", filetag="operatorfile") alignment = name_alignment() range_field = lop_template_range_field() return "using {} = AlignedMatrix<{}, {}>;".format(name, range_field, alignment) @@ -187,5 +188,17 @@ def name_theta(): def evaluate_coefficient_gradient_sumfact(): - include_file("dune/perftool/sumfact/alignedmatvec.hh", filetag="operatorfile") - name_theta() + # TODO this code is WIP and mainly used for experiments. + + name = name_theta() + # TODO Hack + # m = name_number_of_quadrature_points_per_direction() + # n = name_number_of_basis_functions_per_direction() + m = quadrature_points_per_direction() + n = basis_functions_per_direction() + from dune.perftool.sumfact.sumfact import AMatrix, sumfact + a_matrix = AMatrix(name, m, n) + a_matrices = (a_matrix, a_matrix) + inp = 0 + out = -1 + sumfact(a_matrices, inp, out) diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py new file mode 100644 index 00000000..47111314 --- /dev/null +++ b/python/dune/perftool/sumfact/sumfact.py @@ -0,0 +1,123 @@ +from dune.perftool.generation import (class_member, + constructor_block, + domain, + iname, + instruction, + no_caching, + preamble, + ) + +from dune.perftool.sumfact.amatrix import name_alignment + +from dune.perftool.pdelab.localoperator import ( + lop_template_range_field, +) + +class AMatrix(object): + def __init__(self, a_matrix, m, n): + self.a_matrix = a_matrix + self.m = m + self.n = n + + def __str__(self): + return "a_matrix: {}\nm: {}\nn: {}".format(self.a_matrix, self.m, self.n) + + +class _GlobalBufferCounter(object): + counter = 0 + + def get(self): + _GlobalBufferCounter.counter = _GlobalBufferCounter.counter + 1 + return _GlobalBufferCounter.counter + + +@constructor_block("operator") +def construct_buffer(name, a_matrices): + # buffer1 = (T*) aligned_malloc(8*std::max(static_cast<size_t>(M),static_cast<size_t>(N))*sizeof(T),alignment); + # if (buffer1==0) throw std::bad_alloc(); + range_field = lop_template_range_field() + + # Upper bound for the number of elements in the buffer. Note: This + # is usually much too large and could be improved. + max_size = 1 + for mat in a_matrices: + max_size *= max(mat.n, mat.m) + + alignment = name_alignment() + return ["{} = ({}*) aligned_malloc({}, {});".format(name, range_field, max_size, alignment), + "if ({}==0) throw std::bad_alloc();".format(name)] + + + +def type_buffer(): + range_field = lop_template_range_field() + return "mutable {}*".format(range_field) + + +@class_member("operator") +def define_buffer(name, a_matrices): + buffer_type = type_buffer() + construct_buffer(name, a_matrices) + return "{} {};".format(buffer_type, name) + + +def name_buffer(a_matrices): + name = "sumfact_buffer_" + str(_GlobalBufferCounter().get()) + define_buffer(name, a_matrices) + return name + + +def sumfact(a_matrices, inp, out): + # TODO: Think about caching. + + # TODO: Some code suggests that the code works for different n/m + # per direction (eg. n_1 for x-direction and n_2 for y-direction, + # of course n!=m is allowed!). This is not tested at all + assert len(a_matrices) > 0 + n = a_matrices[0].n + m = a_matrices[0].m + for mat in a_matrices: + assert n == mat.n + assert m == mat.m + + # Special case for 1D + if len(a_matrices) == 1: + sumfact_kernel(a_matrices[0], inp, out) + else: + # Create buffer + buffer_1 = name_buffer(a_matrices) + if len(a_matrices) > 2: + buffer_2 = name_buffer(a_matrices) + + if len(a_matrices)%2 == 0: + # First kernel + sumfact_kernel(a_matrices[0], inp, buffer_1) + + # In between kernels + for mat_1, mat_2 in zip(a_matrices[1:-2:2], a_matrices[2:-1:2]): + sumfact_kernel(mat_1, buffer_1, buffer_2) + sumfact_kernel(mat_2, buffer_2, buffer_1) + + # Last kernel + sumfact_kernel(a_matrices[-1], buffer_1, out) + None + else: + # First kernel + sumfact_kernel(a_matrices[0], inp, buffer_1) + + # Second kernel + sumfact_kernel(a_matrices[1], buffer_1, buffer_2) + + # In between kernels + for mat_2, mat_1 in zip(a_matrices[2:-2:2], a_matrices[3:-1:2]): + sumfact_kernel(mat_2, buffer_2, buffer_1) + sumfact_kernel(mat_1, buffer_1, buffer_2) + + # Last kernel + sumfact_kernel(a_matrices[-1], buffer_2, out) + + +@preamble(cache_key_generator=no_caching) +def sumfact_kernel(a_matrix, inp, out): + print("sumfact kernel, inp: {}, out: {}".format(inp, out)) + return "//// palpo" diff --git a/test/sumfact/poisson/poisson.ufl b/test/sumfact/poisson/poisson.ufl index 8c62dd3a..576c603e 100644 --- a/test/sumfact/poisson/poisson.ufl +++ b/test/sumfact/poisson/poisson.ufl @@ -1,7 +1,9 @@ -f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());") -g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());") +cell = "quadrilateral" -V = FiniteElement("CG", "triangle", 1, dirichlet_expression=g) +f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());", cell=cell) +g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());", cell=cell) + +V = FiniteElement("CG", cell, 1, dirichlet_expression=g) u = TrialFunction(V) v = TestFunction(V) -- GitLab