diff --git a/bin/donkey_benchmark_wrapper.py b/bin/donkey_benchmark_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e5740c800582e1f31ede4530e8da7d60429c431
--- /dev/null
+++ b/bin/donkey_benchmark_wrapper.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import time
+import sys
+import os
+
+# Run the actual command
+command = "srun -p haswell10c -n 20 -c 2 --cpu_bin=verbose,core".split()
+command.append(sys.argv[1])
+ret = subprocess.call(command)
+
+# If that failed - fail!
+if ret != 0:
+    sys.exit(ret)
+
+# If that was succesful, wait for the output file to be available on the filesystem
+# This step is necessary because the NFS synchronization is too slow for our workflow.
+while not os.path.isfile(sys.argv[2]):
+    time.sleep(0.1)
diff --git a/bin/donkey_benchmark_wrapper.sh b/bin/donkey_benchmark_wrapper.sh
deleted file mode 100755
index 4c4a93ca829b8a320b261fc389f130c302862b64..0000000000000000000000000000000000000000
--- a/bin/donkey_benchmark_wrapper.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-srun -p haswell10c -n 20 -c 2 --cpu_bin=verbose,core $1
-
-# Give the file system some time to get our results file handled
-sleep 0.1s
diff --git a/python/dune/perftool/sumfact/autotune.py b/python/dune/perftool/sumfact/autotune.py
index ffe643362f3816f2512125da06c75805bceaa748..41a8ff141a6614a3ba1bf28ff2556cd3d2ac87d2 100644
--- a/python/dune/perftool/sumfact/autotune.py
+++ b/python/dune/perftool/sumfact/autotune.py
@@ -63,7 +63,7 @@ def compiler_invocation(name, filename):
     return compile_flags
 
 
-def generate_standalone_code(sf, filename, logname):
+def generate_standalone_code(sf, filename):
     delete_cache_items("kernel_default")
 
     with open(filename, "w") as f:
@@ -174,7 +174,7 @@ def generate_standalone_code(sf, filename, logname):
         # Stop the TSC timer and write the result to a file
         f.writelines(["  auto stop = Dune::PDELab::TSC::stop();\n",
                       "  std::ofstream file;\n",
-                      "  file.open(\"{}\");\n".format(logname),
+                      "  file.open(argv[1]);\n",
                       "  file << Dune::PDELab::TSC::elapsed(start, stop) << std::endl;\n",
                       "  file.close();\n",
                       "  accum += output[dis(rng)];\n",
@@ -200,7 +200,7 @@ def autotune_realization(sf):
     with cache_restoring():
         with filelock.FileLock(lock):
             if not os.path.isfile(logname):
-                generate_standalone_code(sf, filename, logname)
+                generate_standalone_code(sf, filename)
 
                 ret = subprocess.call(compiler_invocation(name, filename))
                 assert ret == 0
@@ -213,6 +213,7 @@ def autotune_realization(sf):
 
                 # Run the benchmark program
                 call.append(name)
+                call.append(logname)
                 devnull = open(os.devnull, 'w')
                 ret = subprocess.call(call, stdout=devnull, stderr=subprocess.STDOUT)
                 assert ret == 0