Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unit Testing Helper #136

Merged
merged 13 commits into from
Nov 14, 2024
3 changes: 2 additions & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ jobs:
coverage:
uses: pylhc/.github/.github/workflows/coverage.yml@master
with:
src-dir: tfs
src-dir: tfs
pytest-options: -m "not cern_network" --cov-report term-missing
secrets: inherit
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# TFS-Pandas Changelog

## Version 3.9.0

- Added:
- A module, `tfs.testing`, has been added and made publicly available. It provides an assert function to compare `TfsDataFrame` similar to that provided by `pandas`, destined for unit tests.

## Version 3.8.2

- Changed:
Expand Down
5 changes: 5 additions & 0 deletions doc/modules/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ API Reference
:noindex:


.. automodule:: tfs.testing
:members:
:noindex:


.. automodule:: tfs.tools
:members:
:noindex:
Expand Down
69 changes: 69 additions & 0 deletions tests/test_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest

from tfs.frame import TfsDataFrame
from tfs.testing import assert_tfs_frame_equal


class TestAssertTfsDataFrameEqual:

def test_no_headers_equal(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
assert_tfs_frame_equal(df1, df1) # we expect True

def test_no_headers_different_data(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = TfsDataFrame({"a": [1, 2, 2], "b": [4, 5, 6]})
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2)

def test_no_headers_different_order(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = TfsDataFrame({"b": [4, 5, 6], "a": [1, 2, 3]})
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2)
assert_tfs_frame_equal(df1, df2, check_like=True)

def test_with_headers_equal(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
df2 = TfsDataFrame({"b": [4, 5, 6], "a": [1, 2, 3]}, headers={"a": "a", "b": "b"})
assert_tfs_frame_equal(df1, df1)
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2)
assert_tfs_frame_equal(df1, df2, check_like=True)

def test_with_headers_different_data(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
df2 = TfsDataFrame({"a": [1, 2, 2], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2)

def test_with_headers_different_datatypes(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
df2 = TfsDataFrame({"a": [1, 2, 3], "b": ["4", "5", "6"]}, headers={"a": "a", "b": "b"})
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2)

df3 = TfsDataFrame({"a": [1.0, 2.0, 3.0], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
with pytest.raises(AssertionError) as e:
assert_tfs_frame_equal(df1, df3)
assert "dtype" in str(e)

def test_with_headers_different_headers_values(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
df2 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "c"})
with pytest.raises(AssertionError) as e:
assert_tfs_frame_equal(df1, df2)
assert "b != c" in str(e)

with pytest.raises(AssertionError) as e:
assert_tfs_frame_equal(df1, df2, compare_keys=False)
fsoubelet marked this conversation as resolved.
Show resolved Hide resolved
assert "b != c" in str(e)

def test_with_headers_different_headers_keys(self):
df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"})
df2 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b", "c": "c"})
with pytest.raises(AssertionError):
assert_tfs_frame_equal(df1, df2) # `compare_keys=True` is default

# compare only common keys ---
assert_tfs_frame_equal(df1, df2, compare_keys=False)
2 changes: 1 addition & 1 deletion tfs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__title__ = "tfs-pandas"
__description__ = "Read and write tfs files."
__url__ = "https://github.com/pylhc/tfs"
__version__ = "3.8.2"
__version__ = "3.9.0"
__author__ = "pylhc"
__author_email__ = "[email protected]"
__license__ = "MIT"
Expand Down
63 changes: 63 additions & 0 deletions tfs/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Testing
-------

Testing functionalty for TfsDataFrames.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from pandas._testing import assert_dict_equal
from pandas.testing import assert_frame_equal

if TYPE_CHECKING:
from tfs.frame import TfsDataFrame


# ----- Helpers ----- #


def assert_tfs_frame_equal(
df1: TfsDataFrame, df2: TfsDataFrame, compare_keys: bool = True, **kwargs
):
"""
Compare two `TfsDataFrame` objects, with `df1` being the reference
that `df2` is compared to. This is mostly intended for unit tests.
Comparison is done on both the contents of the headers dictionaries
(with `pandas`'s `assert_dict_equal`) as well as the data itself
(with `pandas`'s `assert_frame_equal`).

.. note::
The `compare_keys` argument is inherited from `pandas`'s
`assert_dict_equal` function and is quite unintuitive. It
means to check that both dictionaries have *the exact same
set of keys*.

Whether this is given as `True` or `False`, the values are
compared anyway for all keys in the first (reference) dict.
In the case of this helper function, all keys present in
`df1`'s headers will be checked for in `df2`'s headers and
their corresponding values compared. If given as `True`,
then both headers should be the exact same dictionary.

Args:
df1 (TfsDataFrame): The first `TfsDataFrame` to compare.
df2 (TfsDataFrame): The second `TfsDataFrame` to compare.
compare_keys (bool): If `True`, checks that both headers
have the exact same set of keys. See the above note
for exact meaning and caveat. Defaults to `True`.
**kwargs: Additional keyword arguments are transmitted to
`pandas.testing.assert_frame_equal` for the comparison of
the dataframe parts themselves.

Example:
.. code-block:: python

reference_df = tfs.read("path/to/file.tfs")
new_df = some_function(*args, **kwargs)
assert_tfs_frame_equal(reference_df, new_df)
"""
assert_frame_equal(df1, df2, **kwargs)
assert_dict_equal(df1.headers, df2.headers, compare_keys=compare_keys)