diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index 717b9d804ef849b29e1c3fe3c4d7702010efc6e1..bbe00a78f4552932d29f4cc1a18c07c8384db78a 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -129,7 +129,7 @@ def write_setup_code(sf, filename, define_thetas=True): f.write("{} = 0;\n".format(arg)) else: size = sf.interface.fastdg_interface_object_size - f.write("RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], size)) + f.write(" RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], size)) # Write stuff into the input buffer f.writelines([" {0} *input = ({0} *)buffer0;\n".format(real), @@ -214,9 +214,13 @@ def generate_standalone_code_google_benchmark(sf, filename): write_setup_code(sf, filename, define_thetas=False) + additional_arguments = [i.split()[-1] for i in sf.interface.signature_args] + additional_arguments = ', '.join(additional_arguments) + if len(additional_arguments) > 0: + additional_arguments = ', ' + additional_arguments with open(filename, "a") as f: f.writelines([" for (auto _ : state){\n", - " sumfact_kernel(buffer0, buffer1);\n", + " sumfact_kernel(buffer0, buffer1{});\n".format(additional_arguments), " }\n", "}\n", "BENCHMARK(BM_sumfact_kernel);\n", diff --git a/test/sumfact/poisson/CMakeLists.txt b/test/sumfact/poisson/CMakeLists.txt index 3b16c1da023a17b077df6c1acac29a667288cf39..5bb10fbcd303a916ce1ab353d7ecac8972f5dbe4 100644 --- a/test/sumfact/poisson/CMakeLists.txt +++ b/test/sumfact/poisson/CMakeLists.txt @@ -125,7 +125,12 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_3d.ufl #====================================== if(benchmark_FOUND) dune_add_formcompiler_system_test(UFLFILE poisson_3d.ufl - BASENAME sumfact_poisson_3d_benchmark - INIFILE poisson_3d_benchmark.mini - ) + BASENAME sumfact_poisson_3d_benchmark + INIFILE poisson_3d_benchmark.mini + ) + + dune_add_formcompiler_system_test(UFLFILE poisson_dg_volumes_3d.ufl + BASENAME sumfact_poisson_fastdg_volumes_3d_benchmark + INIFILE poisson_fastdg_volumes_3d_benchmark.mini + ) endif() diff --git a/test/sumfact/poisson/poisson_dg_volumes_3d.ufl b/test/sumfact/poisson/poisson_dg_volumes_3d.ufl new file mode 100644 index 0000000000000000000000000000000000000000..4e491864547989ce900fa0c4bac527fe673e14b1 --- /dev/null +++ b/test/sumfact/poisson/poisson_dg_volumes_3d.ufl @@ -0,0 +1,38 @@ +cell = hexahedron +dim = 3 + +x = SpatialCoordinate(cell) +c = (0.5-x[0])**2 + (0.5-x[1])**2 + (0.5-x[2])**2 +g = exp(-1.*c) +f = 2*(3.-2*c)*g + +V = FiniteElement("DG", cell, degree) + +u = TrialFunction(V) +v = TestFunction(V) + +n = FacetNormal(cell)('+') + +# penalty factor +alpha = 1.0 +h_ext = CellVolume(cell) / FacetArea(cell) +gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext +h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell) +gamma_int = (alpha * degree * (degree + dim - 1)) / h_int + +# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0 +theta = 1.0 + +r = inner(grad(u), grad(v))*dx \ + - f*v*dx + + # - inner(n, avg(grad(u)))*jump(v)*dS \ + # + gamma_int*jump(u)*jump(v)*dS \ + # + theta*jump(u)*inner(avg(grad(v)), n)*dS \ + # - inner(n, grad(u))*v*ds \ + # + gamma_ext*u*v*ds \ + # + theta*u*inner(grad(v), n)*ds \ + # - gamma_ext*g*v*ds \ + # - theta*g*inner(grad(v), n)*ds + +exact_solution = g diff --git a/test/sumfact/poisson/poisson_fastdg_volumes_3d_benchmark.mini b/test/sumfact/poisson/poisson_fastdg_volumes_3d_benchmark.mini new file mode 100644 index 0000000000000000000000000000000000000000..253c36bbb6910588b3cabedfa73f3c950ed46e1f --- /dev/null +++ b/test/sumfact/poisson/poisson_fastdg_volumes_3d_benchmark.mini @@ -0,0 +1,32 @@ +__name = sumfact_poisson_fastdg_volumes_3d_benchmark_{__exec_suffix} +__exec_suffix = {deg_suffix}_{diff_suffix}_{quadvec_suffix}_{gradvec_suffix} + +deg_suffix = deg{formcompiler.ufl_variants.degree} +diff_suffix = symdiff +quadvec_suffix = quadvec +gradvec_suffix = autotunevec + +cells = 8 8 8 +extension = 1. 1. 1. + +[wrapper.vtkcompare] +name = {__name} +reference = poisson_ref +extension = vtu + +[formcompiler] +# Since this test makes a DG scheme without skeletons the solution is garbage. +# This test just tests generation of microbenchmarks. +# compare_l2errorsquared = 1e-4 +autotune_google_benchmark = 1 + +[formcompiler.r] +numerical_jacobian = 0 +sumfact = 1 +vectorization_quadloop = 1 +vectorization_strategy = autotune +fastdg = 1 +geometry_mixins = sumfact_equidistant + +[formcompiler.ufl_variants] +degree = 1