Skip to content

Commit

Permalink
refactor(typing): Utilize datasets._typing
Browse files Browse the repository at this point in the history
  • Loading branch information
dangotbanned committed Nov 6, 2024
1 parent 7b0fe29 commit 572d069
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 18 deletions.
28 changes: 12 additions & 16 deletions tools/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import tempfile
from functools import cached_property, partial
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, get_args
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal
from urllib.request import urlopen

import polars as pl
Expand All @@ -38,6 +38,7 @@
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
from tools.datasets._typing import DatasetName, Extension, VersionTag

_PathAlias: TypeAlias = Literal["npm_tags", "gh_tags", "gh_trees"]

Expand Down Expand Up @@ -144,11 +145,6 @@ def write_parquet(self, frame: pl.DataFrame | pl.LazyFrame, fp: Path, /) -> None
_OLD_SOURCE_TAG = "v1.29.0" # 5 years ago
_CURRENT_SOURCE_TAG = "v2.9.0"

ExtSupported: TypeAlias = Literal[".csv", ".json", ".tsv", ".arrow"]
"""
- `'flights-200k.(arrow|json)'` key collison using stem
"""


def generate_datasets_typing(application: Application, output: Path, /) -> None:
app = application
Expand Down Expand Up @@ -180,7 +176,7 @@ def generate_datasets_typing(application: Application, output: Path, /) -> None:
ruff.write_lint_format(output, contents)


def is_ext_supported(suffix: str) -> TypeIs[ExtSupported]:
def is_ext_supported(suffix: str) -> TypeIs[Extension]:
return suffix in {".csv", ".json", ".tsv", ".arrow"}


Expand All @@ -193,7 +189,7 @@ def _js_to_py(s: str, /):


class Dataset:
read_fn: ClassVar[dict[ExtSupported, Callable[..., pl.DataFrame]]] = {
read_fn: ClassVar[dict[Extension, Callable[..., pl.DataFrame]]] = {
".csv": pl.read_csv,
".json": pl.read_json,
".tsv": partial(pl.read_csv, separator="\t"),
Expand All @@ -205,7 +201,7 @@ def __init__(self, name: str, /, base_url: str) -> None:
file_name = DATASETS_JSON[_py_to_js(name)]["filename"]
suffix = Path(file_name).suffix
if is_ext_supported(suffix):
self.extension: ExtSupported = suffix
self.extension: Extension = suffix
else:
raise NotImplementedError(suffix, file_name)

Expand Down Expand Up @@ -348,17 +344,17 @@ def __dir__(self) -> list[str]:

def url(
self,
name: str,
ext: ExtSupported | None = None,
name: DatasetName | LiteralString,
ext: Extension | None = None,
/,
tag: LiteralString | Literal["latest"] | None = None,
tag: VersionTag | Literal["latest"] | None = None,
) -> str:
constraints: dict[Literal["tag", "suffix"], str] = {}
if tag == "latest":
raise NotImplementedError(tag)
elif tag is not None:
constraints["tag"] = tag
if name.endswith(get_args(ExtSupported)):
if name.endswith((".csv", ".json", ".tsv", ".arrow")):
name, suffix = name.rsplit(".", maxsplit=1)
suffix = "." + suffix
if not is_ext_supported(suffix):
Expand All @@ -375,10 +371,10 @@ def url(

def __call__(
self,
name: str,
ext: ExtSupported | None = None,
name: DatasetName | LiteralString,
ext: Extension | None = None,
/,
tag: LiteralString | Literal["latest"] | None = None,
tag: VersionTag | Literal["latest"] | None = None,
) -> WorkInProgress:
"""
**WIP** Will be using this *instead of* attribute access.
Expand Down
4 changes: 2 additions & 2 deletions tools/datasets/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from email.message import Message
from urllib.request import OpenerDirector, Request

from tools.datasets import ExtSupported
from tools.datasets._typing import Extension
from tools.datasets.models import ReParsedTag
from tools.schemapi.utils import OneOrSeq

Expand Down Expand Up @@ -62,7 +62,7 @@
_SUB_DIR = "data"


def is_ext_supported(suffix: str) -> TypeIs[ExtSupported]:
def is_ext_supported(suffix: str) -> TypeIs[Extension]:
return suffix in {".csv", ".json", ".tsv", ".arrow"}


Expand Down

0 comments on commit 572d069

Please sign in to comment.