Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manual/semi-automatic performance regression checking #356

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions profiling/performance_regression_checking.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

cd ..

# Remember where we start from
current_repo=`pwd`

#This is the where we want to check the other worktree out to
cmp_repo=$current_repo/../pyrealm_performance_check

# Adding the worktree
echo "Add worktree" $cmp_repo
git worktree add $cmp_repo HEAD~1

# Go there and activate poetry environment
cd $cmp_repo
poetry install
#source .venv/bin/activate

# Run the profiling on old commit
echo "Run profiling tests on old commit"
poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg

# Go back into the current repo and run there
cd $current_repo
poetry install
echo "Run profiling tests on new commit"
poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg

# Compare the profiling outputs
cd profiling
python -c "
from pathlib import Path
import simple_benchmarking
import pandas as pd

prof_path_old = Path('$cmp_repo'+'/prof/combined.prof')
print(prof_path_old)
df_old = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_old)
cumtime_old = (df_old.sum(numeric_only=True)['cumtime'])
print('Old time:', cumtime_old)

prof_path_new = Path('$current_repo'+'/prof/combined.prof')
print(prof_path_new)
df_new = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_new)
cumtime_new = (df_new.sum(numeric_only=True)['cumtime'])
print('New time:', cumtime_new)

if cumtime_old < cumtime_new:
print('We got slower. :(')
elif cumtime_new < cumtime_old:
print('We got quicker! :)')
else:
print('Times haven\'t changed')
"
cd ..
# Remove the working tree for the comparison commit
echo "Clean up"
git worktree remove --force $cmp_repo
git worktree prune

echo "Done"
96 changes: 96 additions & 0 deletions profiling/simple_benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Run profile benchmarking and generate benchmarking graphics."""

import datetime
import pstats
import sys
import textwrap
from argparse import ArgumentParser
from io import StringIO
from pathlib import Path

import pandas as pd


def run_simple_benchmarking(prof_path: Path) -> pd.DataFrame:
"""Run a simplified benchmarking version.

The function reads the contents of a ``.prof`` file (typically
``prof/combined.prof``) generated by running the profiling test suite and returns
the profiling data as a standardised `pandas.DataFrame`.


Args:
prof_path: Path to the profiling output.
"""

# Import the profile data, write the stats report to a StringIO and seek the start
# to allow the data to be read. The print_stats() explicitly does not filter for
# 'pyrealm' because the string can be found in virtual environment paths and leads
# to inconsistent behaviour across platforms
sio = StringIO()
p = pstats.Stats(str(prof_path), stream=sio)
p.sort_stats(pstats.SortKey.CUMULATIVE).print_stats()
sio.seek(0)

# Consume lines from the report to find the header row
header_found = False
while not header_found:
header = sio.readline()
if "ncalls" in header:
header_found = True

# Set replacement non-duplicated headers
column_names = [
"ncalls",
"tottime",
"tottime_percall",
"cumtime",
"cumtime_percall",
"filename:lineno(function)",
]

# Convert to a DataFrame using fixed width format
df = pd.read_fwf(sio, engine="python", names=column_names, infer_nrows=10)

# Add a timestamp from the file creation date
m_time = datetime.datetime.fromtimestamp(prof_path.stat().st_mtime)
df["timestamp"] = m_time.isoformat(timespec="seconds")

return df


def run_simple_benchmarking_cli() -> None:
"""Run the simple benchmarking."""

if run_simple_benchmarking_cli.__doc__ is not None:
doc = " " + run_simple_benchmarking_cli.__doc__
else:
doc = "Python in -OO mode"

parser = ArgumentParser(
description=textwrap.dedent(doc),
)
parser.add_argument(
"prof_path",
type=Path,
help="Path to pytest-profiling output",
)

args = parser.parse_args()

# Copy the profiling results to the current folder
if not args.prof_path.exists():
raise FileNotFoundError(f"Cannot find the profiling file at {args.prof_path}.")

success = run_simple_benchmarking(prof_path=args.prof_path)

if not success:
print("Benchmarking failed.")
sys.exit(1)

print("Benchmarking passed.")
sys.exit(0)


if __name__ == "__main__":
run_simple_benchmarking_cli()
2 changes: 1 addition & 1 deletion tests/profiling/pmodel/test_profiling_pmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_profiling_pmodel(pmodel_profile_data):
gpp_c3_annual = pmod_c3.gpp * (60 * 60 * 24 * 365) * 1e-6
gpp_c4_annual = pmod_c4.gpp * (60 * 60 * 24 * 365) * 1e-6

# Fit the competition model - making some extrenely poor judgements about what
# Fit the competition model - making some extremely poor judgements about what
# is cropland and what is below the minimum temperature that really should be
# fixed.
comp = C3C4Competition(
Expand Down
Loading