This repository has been archived by the owner on Jan 12, 2024. It is now read-only.
generated from catalyst-cooperative/cheshire
-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from catalyst-cooperative/dev
Flesh out integration tests.
- Loading branch information
Showing
9 changed files
with
246 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,71 @@ | ||
"""PyTest configuration module. Defines useful fixtures, command line args.""" | ||
from __future__ import annotations | ||
|
||
import logging | ||
import tempfile | ||
from collections.abc import Generator | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
import sqlalchemy as sa | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def pytest_addoption(parser: pytest.Parser) -> None: | ||
"""Add package-specific command line options to pytest. | ||
This is slightly magical -- pytest has a hook that will run this function | ||
automatically, adding any options defined here to the internal pytest options that | ||
already exist. | ||
""" | ||
parser.addoption( | ||
"--sandbox", | ||
action="store_true", | ||
default=False, | ||
help="Flag to indicate that the tests should use a sandbox.", | ||
@pytest.fixture(scope="session") | ||
def df1() -> pd.DataFrame: | ||
"""A dataframe with a named primary key.""" | ||
df = pd.DataFrame( | ||
{ | ||
"a": np.random.rand(100).tolist(), | ||
"b": np.random.randint(100, size=100).tolist(), | ||
"c": np.random.choice(["a", "b", "c", "d"], size=100).tolist(), | ||
} | ||
) | ||
df.index.name = "pk" | ||
return df | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def test_dir() -> Path: | ||
"""Return the path to the top-level directory containing the tests. | ||
def df2() -> pd.DataFrame: | ||
"""A dataframe with no primary key.""" | ||
return pd.DataFrame( | ||
{ | ||
"d": np.random.rand(100).tolist(), | ||
"e": np.random.randint(100, size=100).tolist(), | ||
"f": np.random.choice(["a", "b", "c", "d"], size=100).tolist(), | ||
} | ||
) | ||
|
||
This might be useful if there's test data stored under the tests directory that | ||
you need to be able to access from elsewhere within the tests. | ||
|
||
Mostly this is meant as an example of a fixture. | ||
""" | ||
return Path(__file__).parent | ||
@pytest.fixture(scope="session") | ||
def temp_db( | ||
df1: pd.DataFrame, | ||
df2: pd.DataFrame, | ||
) -> Generator[tuple[str, str, str], None, None]: | ||
"""Create a temporary SQLite DB for use in testing.""" | ||
urlpath = Path(tempfile.mkstemp(suffix=".db")[1]) | ||
engine = sa.create_engine(f"sqlite:///{urlpath}") | ||
with engine.connect() as con: | ||
con.execute( | ||
"""CREATE TABLE temp ( | ||
pk BIGINT PRIMARY KEY, | ||
a REAL NOT NULL, | ||
b BIGINT NOT NULL, | ||
c TEXT NOT NULL);""" | ||
) | ||
con.execute( | ||
"""CREATE TABLE temp_nopk ( | ||
d REAL NOT NULL, | ||
e BIGINT NOT NULL, | ||
f TEXT NOT NULL);""" | ||
) | ||
df1.to_sql("temp", con=con, if_exists="append") | ||
df2.to_sql("temp_nopk", con=con, if_exists="append", index=False) | ||
try: | ||
yield "temp", "temp_nopk", str(urlpath) | ||
finally: | ||
if urlpath.is_file(): | ||
urlpath.unlink() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""SQLite Catalog integration tests.""" | ||
from __future__ import annotations | ||
|
||
import logging | ||
|
||
import intake | ||
import pandas as pd | ||
from pandas.testing import assert_frame_equal | ||
|
||
from intake_sqlite import SQLiteCatalog | ||
|
||
# pytest imports this package last, so plugin is not auto-added | ||
intake.register_driver(name="sqlite_cat", driver=SQLiteCatalog) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def test_local_sqlite_catalog( | ||
temp_db: tuple[str, str, str], | ||
df1: pd.DataFrame, | ||
df2: pd.DataFrame, | ||
) -> None: | ||
"""Test reading tables from a local SQLite catalog.""" | ||
table, table_nopk, urlpath = temp_db | ||
cat = SQLiteCatalog(urlpath) | ||
assert table in cat # nosec: B101 | ||
assert table_nopk in cat # nosec: B101 | ||
actual_pk = getattr(cat, table).read() | ||
assert_frame_equal(df1, actual_pk) | ||
actual_nopk = getattr(cat, table_nopk).read() | ||
assert_frame_equal(df2, actual_nopk) | ||
|
||
|
||
def test_remote_sqlite_catalog() -> None: | ||
"""Test ability to create and access a remote SQLiteCatalog.""" | ||
gpp_cat = SQLiteCatalog( | ||
urlpath="https://global-power-plants.datasettes.com/global-power-plants.db", | ||
) | ||
assert "global-power-plants" in gpp_cat # nosec: B101 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
"""SQLite Intake Source integration tests.""" | ||
from __future__ import annotations | ||
|
||
import logging | ||
|
||
import intake | ||
import pandas as pd | ||
from pandas.testing import assert_frame_equal | ||
|
||
from intake_sqlite import ( | ||
SQLiteSource, | ||
SQLiteSourceAutoPartition, | ||
SQLiteSourceManualPartition, | ||
) | ||
|
||
# pytest imports this package last, so plugin is not auto-added | ||
intake.register_driver(name="sqlite", driver=SQLiteSource) | ||
intake.register_driver(name="sqlite_auto", driver=SQLiteSourceAutoPartition) | ||
intake.register_driver(name="sqlite_manual", driver=SQLiteSourceManualPartition) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def test_temp_db_fixture(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None: | ||
"""Make sure a direct read from the temp DB works.""" | ||
table, table_nopk, urlpath = temp_db | ||
actual = pd.read_sql(table, f"sqlite:///{urlpath}", index_col="pk") | ||
assert_frame_equal(df1, actual) | ||
|
||
|
||
def test_simple_src(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None: | ||
"""Test simple table read from the SQLite catalog.""" | ||
table, table_nopk, urlpath = temp_db | ||
actual = SQLiteSource(urlpath, table, sql_kwargs=dict(index_col="pk")).read() | ||
assert_frame_equal(df1, actual) | ||
|
||
|
||
def test_auto_src_partition(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None: | ||
"""Test automatic partitioning of table.""" | ||
table, table_nopk, urlpath = temp_db | ||
s = SQLiteSourceAutoPartition( | ||
urlpath, table, index="pk", sql_kwargs=dict(npartitions=2) | ||
) | ||
assert s.discover()["npartitions"] == 2 # nosec: B101 | ||
assert s.to_dask().npartitions == 2 # nosec: B101 | ||
actual = s.read() | ||
assert_frame_equal(df1, actual) | ||
|
||
|
||
def test_manual_src_partition(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None: | ||
"""Test manual partitioning of table.""" | ||
table, table_nopk, urlpath = temp_db | ||
table, table_nopk, urlpath = temp_db | ||
s = SQLiteSourceManualPartition( | ||
urlpath, | ||
"SELECT * FROM " + table, # nosec: B608 | ||
where_values=["WHERE pk < 20", "WHERE pk >= 20"], | ||
sql_kwargs=dict(index_col="pk"), | ||
) | ||
assert s.discover()["npartitions"] == 2 # nosec: B101 | ||
assert s.to_dask().npartitions == 2 # nosec: B101 | ||
actual = s.read() | ||
assert_frame_equal(df1, actual) | ||
|
||
|
||
def test_remote_sqlite_source() -> None: | ||
"""Test ability to create and access remote SQLiteSource.""" | ||
gpp_src = SQLiteSource( | ||
urlpath="https://global-power-plants.datasettes.com/global-power-plants.db", | ||
sql_expr="SELECT * FROM 'global-power-plants'", | ||
) | ||
df = gpp_src.read() | ||
assert df.shape == (34936, 36) # nosec: B101 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,66 @@ | ||
"""A dummy unit test so pytest has something to do.""" | ||
"""SQLite Intake Catalog unit tests.""" | ||
from __future__ import annotations | ||
|
||
import logging | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from intake_sqlite import urlpath_to_sqliteurl | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
TEST_DIR = Path(__file__).parent.parent.resolve() | ||
DATA_DIR = Path(__file__).resolve().parents[1] / "data" | ||
|
||
BAD_FILES: list[tuple[str, type[Exception]]] = [ | ||
("database.wtf", ValueError), | ||
("dbdump.sql", ValueError), | ||
("nonexistent.db", ValueError), | ||
("nonexistent.sqlite", ValueError), | ||
] | ||
|
||
BAD_URLS: list[tuple[str, type[Exception]]] = [ | ||
("https://catalyst.coop/pudl.wtf", ValueError), | ||
("s3://catalyst.coop/pudl.dude", ValueError), | ||
("gs://catalyst.coop/pudl.sql", ValueError), | ||
("wtftp://catalyst.coop/pudl.sqlite", ValueError), | ||
("wtftp://catalyst.coop/pudl.db", ValueError), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("filename,exc", BAD_FILES) | ||
def test_bad_filenames(filename: str, exc: type[Exception], tmp_path: Path) -> None: | ||
"""Test for failure on bad or non-existent files.""" | ||
urlpath = tmp_path / filename | ||
with pytest.raises(exc): | ||
urlpath_to_sqliteurl(str(urlpath)) | ||
|
||
|
||
def test_urlpath_to_sqliteurl() -> None: | ||
@pytest.mark.parametrize("dirname,exc", BAD_FILES) | ||
def test_bad_dirnames(dirname: str, exc: type[Exception], tmp_path: Path) -> None: | ||
"""Test for failure when path points to a directory, not a file.""" | ||
urlpath = tmp_path / dirname | ||
urlpath.mkdir() | ||
with pytest.raises(exc): | ||
urlpath_to_sqliteurl(str(urlpath)) | ||
|
||
|
||
@pytest.mark.parametrize("url,exc", BAD_URLS) | ||
def test_bad_urls(url: str, exc: type[Exception]) -> None: | ||
"""Test for failure when we get a bad URL.""" | ||
with pytest.raises(exc): | ||
urlpath_to_sqliteurl(url) | ||
|
||
|
||
def test_local_path_to_sqliteurl() -> None: | ||
"""Test our transformation of paths/URLs into SQL Alchemy URLs.""" | ||
expected_local_url = "sqlite:///" + str(TEST_DIR / "data/test.db") | ||
test_db_path = TEST_DIR / "data/test.db" | ||
expected_local_url = f"sqlite:///{DATA_DIR / 'test.db'}" | ||
test_db_path = DATA_DIR / "test.db" | ||
actual_local_url = urlpath_to_sqliteurl(str(test_db_path)) | ||
assert actual_local_url == expected_local_url # nosec: B101 | ||
|
||
|
||
# Note: There's no remote URL unit test for a working input to urlpath_to_sqliteurl() | ||
# because it's exercised in the integration tests, and there's no way to know what the | ||
# local path to the cached file will be since it uses a hash (of the URL?) as the | ||
# filename. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters