Skip to content
This repository has been archived by the owner on Jan 12, 2024. It is now read-only.

Commit

Permalink
Merge pull request #9 from catalyst-cooperative/dev
Browse files Browse the repository at this point in the history
Flesh out integration tests.
  • Loading branch information
zaneselvans authored May 20, 2022
2 parents 492df1a + f8e2ffe commit 4fb428c
Show file tree
Hide file tree
Showing 9 changed files with 246 additions and 56 deletions.
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ ignore_missing_imports = True
[mypy-fsspec.*]
ignore_missing_imports = True

[mypy-intake.*]
ignore_missing_imports = True

[mypy-intake_sql.*]
ignore_missing_imports = True

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"flake8-use-fstring>=1,<2", # Highlight use of old-style string formatting
"fsspec[http]", # Extras required for our specific test cases.
"mccabe>=0.6,<0.8", # Checks that code isn't overly complicated
"msgpack-numpy>=0.4,<0.5", # Required to serialize Numpy arrays
"mypy>=0.942", # Static type checking
"pep8-naming>=0.12,<0.13", # Require PEP8 compliant variable names
"pre-commit>=2.9,<3", # Allow us to run pre-commit hooks in testing
Expand Down
27 changes: 19 additions & 8 deletions src/intake_sqlite/sqlite_src.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
from pathlib import Path
from typing import Any
from urllib.parse import urlparse

import fsspec
from intake_sql import SQLSource, SQLSourceAutoPartition, SQLSourceManualPartition
Expand Down Expand Up @@ -144,14 +145,24 @@ def __init__(

def urlpath_to_sqliteurl(urlpath: str, open_kwargs: dict[str, Any] = {}) -> str:
"""Transform a file path or URL into a local SQLite URL."""
if Path(urlpath).is_file():
p = Path(urlpath)
if p.suffix not in SQLITE_SUFFIXES:
raise ValueError(
f"Expected a SQLite file ending in one of: {SQLITE_SUFFIXES} "
f"but got: {p.name}"
)
parsed = urlparse(urlpath)
p = Path(parsed.path)
if p.suffix not in SQLITE_SUFFIXES:
raise ValueError(
f"Expected a SQLite file path ending in one of: {SQLITE_SUFFIXES} "
f"but got: {p.name}"
)
if parsed.scheme != "" and parsed.scheme not in fsspec.available_protocols():
raise ValueError(f"URL protocol {parsed.scheme} is not supported by fsspec.")
if parsed.scheme == "" and not p.is_file():
raise ValueError(f"Local path {p} is not a file!")
# At this point we know that EITHER:
# * urlpath is a URL supported by fsspec that looks like an SQLite file OR
# * p is a local file that looks like an SQLite file
if parsed.scheme == "":
# Absolute path to the local SQLite DB:
local_db_path = p.resolve()
else:
# Absolute path to the locally cached SQLite DB:
local_db_path = fsspec.open_local("simplecache::" + urlpath, **open_kwargs)
return "sqlite:///" + str(local_db_path)
return f"sqlite:///{local_db_path}"
75 changes: 56 additions & 19 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,71 @@
"""PyTest configuration module. Defines useful fixtures, command line args."""
from __future__ import annotations

import logging
import tempfile
from collections.abc import Generator
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import sqlalchemy as sa

logger = logging.getLogger(__name__)


def pytest_addoption(parser: pytest.Parser) -> None:
"""Add package-specific command line options to pytest.
This is slightly magical -- pytest has a hook that will run this function
automatically, adding any options defined here to the internal pytest options that
already exist.
"""
parser.addoption(
"--sandbox",
action="store_true",
default=False,
help="Flag to indicate that the tests should use a sandbox.",
@pytest.fixture(scope="session")
def df1() -> pd.DataFrame:
"""A dataframe with a named primary key."""
df = pd.DataFrame(
{
"a": np.random.rand(100).tolist(),
"b": np.random.randint(100, size=100).tolist(),
"c": np.random.choice(["a", "b", "c", "d"], size=100).tolist(),
}
)
df.index.name = "pk"
return df


@pytest.fixture(scope="session")
def test_dir() -> Path:
"""Return the path to the top-level directory containing the tests.
def df2() -> pd.DataFrame:
"""A dataframe with no primary key."""
return pd.DataFrame(
{
"d": np.random.rand(100).tolist(),
"e": np.random.randint(100, size=100).tolist(),
"f": np.random.choice(["a", "b", "c", "d"], size=100).tolist(),
}
)

This might be useful if there's test data stored under the tests directory that
you need to be able to access from elsewhere within the tests.

Mostly this is meant as an example of a fixture.
"""
return Path(__file__).parent
@pytest.fixture(scope="session")
def temp_db(
df1: pd.DataFrame,
df2: pd.DataFrame,
) -> Generator[tuple[str, str, str], None, None]:
"""Create a temporary SQLite DB for use in testing."""
urlpath = Path(tempfile.mkstemp(suffix=".db")[1])
engine = sa.create_engine(f"sqlite:///{urlpath}")
with engine.connect() as con:
con.execute(
"""CREATE TABLE temp (
pk BIGINT PRIMARY KEY,
a REAL NOT NULL,
b BIGINT NOT NULL,
c TEXT NOT NULL);"""
)
con.execute(
"""CREATE TABLE temp_nopk (
d REAL NOT NULL,
e BIGINT NOT NULL,
f TEXT NOT NULL);"""
)
df1.to_sql("temp", con=con, if_exists="append")
df2.to_sql("temp_nopk", con=con, if_exists="append", index=False)
try:
yield "temp", "temp_nopk", str(urlpath)
finally:
if urlpath.is_file():
urlpath.unlink()
24 changes: 0 additions & 24 deletions tests/integration/intake_sqlite_test.py

This file was deleted.

39 changes: 39 additions & 0 deletions tests/integration/sqlite_cat_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""SQLite Catalog integration tests."""
from __future__ import annotations

import logging

import intake
import pandas as pd
from pandas.testing import assert_frame_equal

from intake_sqlite import SQLiteCatalog

# pytest imports this package last, so plugin is not auto-added
intake.register_driver(name="sqlite_cat", driver=SQLiteCatalog)

logger = logging.getLogger(__name__)


def test_local_sqlite_catalog(
temp_db: tuple[str, str, str],
df1: pd.DataFrame,
df2: pd.DataFrame,
) -> None:
"""Test reading tables from a local SQLite catalog."""
table, table_nopk, urlpath = temp_db
cat = SQLiteCatalog(urlpath)
assert table in cat # nosec: B101
assert table_nopk in cat # nosec: B101
actual_pk = getattr(cat, table).read()
assert_frame_equal(df1, actual_pk)
actual_nopk = getattr(cat, table_nopk).read()
assert_frame_equal(df2, actual_nopk)


def test_remote_sqlite_catalog() -> None:
"""Test ability to create and access a remote SQLiteCatalog."""
gpp_cat = SQLiteCatalog(
urlpath="https://global-power-plants.datasettes.com/global-power-plants.db",
)
assert "global-power-plants" in gpp_cat # nosec: B101
73 changes: 73 additions & 0 deletions tests/integration/sqlite_src_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""SQLite Intake Source integration tests."""
from __future__ import annotations

import logging

import intake
import pandas as pd
from pandas.testing import assert_frame_equal

from intake_sqlite import (
SQLiteSource,
SQLiteSourceAutoPartition,
SQLiteSourceManualPartition,
)

# pytest imports this package last, so plugin is not auto-added
intake.register_driver(name="sqlite", driver=SQLiteSource)
intake.register_driver(name="sqlite_auto", driver=SQLiteSourceAutoPartition)
intake.register_driver(name="sqlite_manual", driver=SQLiteSourceManualPartition)

logger = logging.getLogger(__name__)


def test_temp_db_fixture(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None:
"""Make sure a direct read from the temp DB works."""
table, table_nopk, urlpath = temp_db
actual = pd.read_sql(table, f"sqlite:///{urlpath}", index_col="pk")
assert_frame_equal(df1, actual)


def test_simple_src(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None:
"""Test simple table read from the SQLite catalog."""
table, table_nopk, urlpath = temp_db
actual = SQLiteSource(urlpath, table, sql_kwargs=dict(index_col="pk")).read()
assert_frame_equal(df1, actual)


def test_auto_src_partition(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None:
"""Test automatic partitioning of table."""
table, table_nopk, urlpath = temp_db
s = SQLiteSourceAutoPartition(
urlpath, table, index="pk", sql_kwargs=dict(npartitions=2)
)
assert s.discover()["npartitions"] == 2 # nosec: B101
assert s.to_dask().npartitions == 2 # nosec: B101
actual = s.read()
assert_frame_equal(df1, actual)


def test_manual_src_partition(temp_db: tuple[str, str, str], df1: pd.DataFrame) -> None:
"""Test manual partitioning of table."""
table, table_nopk, urlpath = temp_db
table, table_nopk, urlpath = temp_db
s = SQLiteSourceManualPartition(
urlpath,
"SELECT * FROM " + table, # nosec: B608
where_values=["WHERE pk < 20", "WHERE pk >= 20"],
sql_kwargs=dict(index_col="pk"),
)
assert s.discover()["npartitions"] == 2 # nosec: B101
assert s.to_dask().npartitions == 2 # nosec: B101
actual = s.read()
assert_frame_equal(df1, actual)


def test_remote_sqlite_source() -> None:
"""Test ability to create and access remote SQLiteSource."""
gpp_src = SQLiteSource(
urlpath="https://global-power-plants.datasettes.com/global-power-plants.db",
sql_expr="SELECT * FROM 'global-power-plants'",
)
df = gpp_src.read()
assert df.shape == (34936, 36) # nosec: B101
59 changes: 54 additions & 5 deletions tests/unit/intake_sqlite_test.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,66 @@
"""A dummy unit test so pytest has something to do."""
"""SQLite Intake Catalog unit tests."""
from __future__ import annotations

import logging
from pathlib import Path

import pytest

from intake_sqlite import urlpath_to_sqliteurl

logger = logging.getLogger(__name__)

TEST_DIR = Path(__file__).parent.parent.resolve()
DATA_DIR = Path(__file__).resolve().parents[1] / "data"

BAD_FILES: list[tuple[str, type[Exception]]] = [
("database.wtf", ValueError),
("dbdump.sql", ValueError),
("nonexistent.db", ValueError),
("nonexistent.sqlite", ValueError),
]

BAD_URLS: list[tuple[str, type[Exception]]] = [
("https://catalyst.coop/pudl.wtf", ValueError),
("s3://catalyst.coop/pudl.dude", ValueError),
("gs://catalyst.coop/pudl.sql", ValueError),
("wtftp://catalyst.coop/pudl.sqlite", ValueError),
("wtftp://catalyst.coop/pudl.db", ValueError),
]


@pytest.mark.parametrize("filename,exc", BAD_FILES)
def test_bad_filenames(filename: str, exc: type[Exception], tmp_path: Path) -> None:
"""Test for failure on bad or non-existent files."""
urlpath = tmp_path / filename
with pytest.raises(exc):
urlpath_to_sqliteurl(str(urlpath))


def test_urlpath_to_sqliteurl() -> None:
@pytest.mark.parametrize("dirname,exc", BAD_FILES)
def test_bad_dirnames(dirname: str, exc: type[Exception], tmp_path: Path) -> None:
"""Test for failure when path points to a directory, not a file."""
urlpath = tmp_path / dirname
urlpath.mkdir()
with pytest.raises(exc):
urlpath_to_sqliteurl(str(urlpath))


@pytest.mark.parametrize("url,exc", BAD_URLS)
def test_bad_urls(url: str, exc: type[Exception]) -> None:
"""Test for failure when we get a bad URL."""
with pytest.raises(exc):
urlpath_to_sqliteurl(url)


def test_local_path_to_sqliteurl() -> None:
"""Test our transformation of paths/URLs into SQL Alchemy URLs."""
expected_local_url = "sqlite:///" + str(TEST_DIR / "data/test.db")
test_db_path = TEST_DIR / "data/test.db"
expected_local_url = f"sqlite:///{DATA_DIR / 'test.db'}"
test_db_path = DATA_DIR / "test.db"
actual_local_url = urlpath_to_sqliteurl(str(test_db_path))
assert actual_local_url == expected_local_url # nosec: B101


# Note: There's no remote URL unit test for a working input to urlpath_to_sqliteurl()
# because it's exercised in the integration tests, and there's no way to know what the
# local path to the cached file will be since it uses a hash (of the URL?) as the
# filename.
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ envlist = ci
allowlist_externals =
bash
coverage
mypy
sphinx-build
twine
# shared directory for re-used packages
Expand Down

0 comments on commit 4fb428c

Please sign in to comment.