ImperialCollegeLondon · MarionBWeinzierl · Oct 22, 2024 · Oct 22, 2024 · Nov 5, 2024 · Nov 12, 2024
diff --git a/profiling/performance_regression_checking.sh b/profiling/performance_regression_checking.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+cd ..
+
+# Remember where we start from
+current_repo=`pwd`
+
+#This is the where we want to check the other worktree out to
+cmp_repo=$current_repo/../pyrealm_performance_check
+
+# Adding the worktree
+echo "Add worktree" $cmp_repo
+git worktree add $cmp_repo HEAD~1
+
+# Go there and activate poetry environment
+cd $cmp_repo
+poetry install
+#source .venv/bin/activate
+
+# Run the profiling on old commit
+echo "Run profiling tests on old commit"
+poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg
+
+# Go back into the current repo and run there
+cd $current_repo
+poetry install
+echo "Run profiling tests on new commit"
+poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg
+
+# Compare the profiling outputs
+cd profiling
+python -c "
+from pathlib import Path
+import simple_benchmarking
+import pandas as pd
+
+prof_path_old = Path('$cmp_repo'+'/prof/combined.prof')
+print(prof_path_old)
+df_old = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_old)
+cumtime_old = (df_old.sum(numeric_only=True)['cumtime'])
+print('Old time:', cumtime_old)
+
+prof_path_new = Path('$current_repo'+'/prof/combined.prof')
+print(prof_path_new)
+df_new = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_new)
+cumtime_new = (df_new.sum(numeric_only=True)['cumtime'])
+print('New time:', cumtime_new)
+
+if cumtime_old < cumtime_new:
+  print('We got slower. :(')
+elif cumtime_new < cumtime_old:
+  print('We got quicker! :)')
+else:
+  print('Times haven\'t changed')
+"
+cd ..
+# Remove the working tree for the comparison commit
+echo "Clean up"
+git worktree remove --force $cmp_repo
+git worktree prune
+
+echo "Done"
diff --git a/profiling/simple_benchmarking.py b/profiling/simple_benchmarking.py
@@ -0,0 +1,96 @@
+"""Run profile benchmarking and generate benchmarking graphics."""
+
+import datetime
+import pstats
+import sys
+import textwrap
+from argparse import ArgumentParser
+from io import StringIO
+from pathlib import Path
+
+import pandas as pd
+
+
+def run_simple_benchmarking(prof_path: Path) -> pd.DataFrame:
+    """Run a simplified benchmarking version.
+
+    The function reads the contents of a ``.prof`` file (typically
+    ``prof/combined.prof``) generated by running the profiling test suite and returns
+    the profiling data as a standardised `pandas.DataFrame`.
+
+
+    Args:
+        prof_path: Path to the profiling output.
+    """
+
+    # Import the profile data, write the stats report to a StringIO and seek the start
+    # to allow the data to be read. The print_stats() explicitly does not filter for
+    # 'pyrealm' because the string can be found in virtual environment paths and leads
+    # to inconsistent behaviour across platforms
+    sio = StringIO()
+    p = pstats.Stats(str(prof_path), stream=sio)
+    p.sort_stats(pstats.SortKey.CUMULATIVE).print_stats()
+    sio.seek(0)
+
+    # Consume lines from the report to find the header row
+    header_found = False
+    while not header_found:
+        header = sio.readline()
+        if "ncalls" in header:
+            header_found = True
+
+    # Set replacement non-duplicated headers
+    column_names = [
+        "ncalls",
+        "tottime",
+        "tottime_percall",
+        "cumtime",
+        "cumtime_percall",
+        "filename:lineno(function)",
+    ]
+
+    # Convert to a DataFrame using fixed width format
+    df = pd.read_fwf(sio, engine="python", names=column_names, infer_nrows=10)
+
+    # Add a timestamp from the file creation date
+    m_time = datetime.datetime.fromtimestamp(prof_path.stat().st_mtime)
+    df["timestamp"] = m_time.isoformat(timespec="seconds")
+
+    return df
+
+
+def run_simple_benchmarking_cli() -> None:
+    """Run the simple benchmarking."""
+
+    if run_simple_benchmarking_cli.__doc__ is not None:
+        doc = "    " + run_simple_benchmarking_cli.__doc__
+    else:
+        doc = "Python in -OO mode"
+
+    parser = ArgumentParser(
+        description=textwrap.dedent(doc),
+    )
+    parser.add_argument(
+        "prof_path",
+        type=Path,
+        help="Path to pytest-profiling output",
+    )
+
+    args = parser.parse_args()
+
+    # Copy the profiling results to the current folder
+    if not args.prof_path.exists():
+        raise FileNotFoundError(f"Cannot find the profiling file at {args.prof_path}.")
+
+    success = run_simple_benchmarking(prof_path=args.prof_path)
+
+    if not success:
+        print("Benchmarking failed.")
+        sys.exit(1)
+
+    print("Benchmarking passed.")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    run_simple_benchmarking_cli()
diff --git a/tests/profiling/pmodel/test_profiling_pmodel.py b/tests/profiling/pmodel/test_profiling_pmodel.py
@@ -28,7 +28,7 @@ def test_profiling_pmodel(pmodel_profile_data):
     gpp_c3_annual = pmod_c3.gpp * (60 * 60 * 24 * 365) * 1e-6
     gpp_c4_annual = pmod_c4.gpp * (60 * 60 * 24 * 365) * 1e-6
 
-    # Fit the competition model - making some extrenely poor judgements about what
+    # Fit the competition model - making some extremely poor judgements about what
     # is cropland and what is below the minimum temperature that really should be
     # fixed.
     comp = C3C4Competition(