From 3a2f1a1bff0a8a7667039d5fffd7a7698d2e3f37 Mon Sep 17 00:00:00 2001 From: Jonathan Shi <149419494+sfc-gh-joshi@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:14:32 -0700 Subject: [PATCH] SNOW-1635365: Update snowpark pandas imports in method documentation (#2204) 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1635365 (resolves #2139) 2. Fill out the following pre-review checklist: - [ ] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. 3. Please describe how your code solves the related issue. A few methods in documentation had an outdated import method for Snowpark pandas (`import snowflake.snowpark.modin.pandas as pd`). This PR updates docstrings to use the correct method (`import modin.pandas as pd; import snowflake.snowpark.modin.plugin`). --- .../snowpark/modin/pandas/__init__.py | 4 +- src/snowflake/snowpark/modin/pandas/io.py | 37 +------------------ .../snowpark/modin/plugin/docstrings/base.py | 1 - .../modin/plugin/docstrings/series.py | 1 - .../modin/plugin/extensions/pd_extensions.py | 6 ++- .../modin/plugin/extensions/pd_overrides.py | 8 ++-- .../snowpark/modin/plugin/io/snow_io.py | 4 +- tests/integ/modin/io/test_read_json.py | 2 +- 8 files changed, 16 insertions(+), 47 deletions(-) diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py index 7afd9f57218..4ea950fe1b4 100644 --- a/src/snowflake/snowpark/modin/pandas/__init__.py +++ b/src/snowflake/snowpark/modin/pandas/__init__.py @@ -122,7 +122,7 @@ value_counts, wide_to_long, ) -from snowflake.snowpark.modin.pandas.io import ( +from snowflake.snowpark.modin.pandas.io import ( # read_json is provided by overrides module ExcelFile, HDFStore, json_normalize, @@ -134,7 +134,6 @@ read_gbq, read_hdf, read_html, - read_json, read_orc, read_parquet, read_pickle, @@ -164,6 +163,7 @@ Index, DatetimeIndex, TimedeltaIndex, + read_json, ) # Record which attributes are defined on an upstream object, and which are defined on a vendored diff --git a/src/snowflake/snowpark/modin/pandas/io.py b/src/snowflake/snowpark/modin/pandas/io.py index 366dff06a31..25959212a18 100644 --- a/src/snowflake/snowpark/modin/pandas/io.py +++ b/src/snowflake/snowpark/modin/pandas/io.py @@ -27,6 +27,8 @@ Manually add documentation for methods which are not presented in pandas. """ +# TODO SNOW-1650875: remove this file since we define everything in pd_overrides.py + from __future__ import annotations import csv @@ -91,7 +93,6 @@ # below logic is to handle circular imports without errors if TYPE_CHECKING: # pragma: no cover from .dataframe import DataFrame - from .series import Series # TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available @@ -359,39 +360,6 @@ def read_parquet( ) -# TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available -@snowpark_pandas_telemetry_standalone_function_decorator -@expanduser_path_arg("path_or_buf") -def read_json( - path_or_buf, - *, - orient: str | None = None, - typ: Literal["frame", "series"] = "frame", - dtype: DtypeArg | None = None, - convert_axes=None, - convert_dates: bool | list[str] = True, - keep_default_dates: bool = True, - precise_float: bool = False, - date_unit: str | None = None, - encoding: str | None = None, - encoding_errors: str | None = "strict", - lines: bool = False, - chunksize: int | None = None, - compression: CompressionOptions = "infer", - nrows: int | None = None, - storage_options: StorageOptions = None, - dtype_backend: DtypeBackend | NoDefault = no_default, - engine="ujson", -) -> DataFrame | Series | pandas.io.json._json.JsonReader: # pragma: no cover: this function is overridden by plugin/pd_overrides.py - _, _, _, kwargs = inspect.getargvalues(inspect.currentframe()) - - from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import ( - FactoryDispatcher, - ) - - return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_json(**kwargs)) - - @_inherit_docstrings(pandas.read_gbq, apilink="pandas.read_gbq") @snowpark_pandas_telemetry_standalone_function_decorator def read_gbq( @@ -1145,7 +1113,6 @@ def to_numpy( "read_gbq", "read_hdf", "read_html", - "read_json", "read_orc", "read_parquet", "read_pickle", diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py index 8a4e2f4d820..4eb1bd1584c 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py @@ -2043,7 +2043,6 @@ def nunique(): Examples -------- - >>> import snowflake.snowpark.modin.pandas as pd >>> df = pd.DataFrame({'A': [4, 5, 6], 'B': [4, 1, 1]}) >>> df.nunique() A 3 diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index c8318187530..266d4b33206 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -3792,7 +3792,6 @@ def nunique(): Examples -------- - >>> import snowflake.snowpark.modin.pandas as pd >>> import numpy as np >>> s = pd.Series([1, 3, 5, 7, 7]) >>> s diff --git a/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py index 8f55985f2b7..0f4d512a9d6 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py @@ -99,7 +99,8 @@ def read_snowflake( will have a default index from 0 to n-1, where n is the number of rows in the table, and have all columns in the Snowflake table as data columns. - >>> import snowflake.snowpark.modin.pandas as pd + >>> import modin.pandas as pd + >>> import snowflake.snowpark.modin.plugin >>> pd.read_snowflake(table_name) # doctest: +NORMALIZE_WHITESPACE A B C 0 1 2 3 @@ -171,7 +172,8 @@ def read_snowflake( - When ``index_col`` is not specified, a Snowpark pandas DataFrame will have a default index from 0 to n-1, where n is the number of rows in the table. - >>> import snowflake.snowpark.modin.pandas as pd + >>> import modin.pandas as pd + >>> import snowflake.snowpark.modin.plugin >>> pd.read_snowflake(f"SELECT * FROM {table_name}") # doctest: +NORMALIZE_WHITESPACE A B C 0 1 2 3 diff --git a/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py index 5d61bc95694..dea98bbb0d3 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py @@ -308,7 +308,8 @@ def read_csv( >>> with open(f'{temp_dir_name}/data.csv', 'w') as f: ... writer = csv.writer(f) ... writer.writerows([['c1','c2','c3'], [1,2,3], [4,5,6], [7,8,9]]) - >>> import snowflake.snowpark.modin.pandas as pd + >>> import modin.pandas as pd + >>> import snowflake.snowpark.modin.plugin >>> df = pd.read_csv(f'{temp_dir_name}/data.csv') >>> df c1 c2 c3 @@ -369,7 +370,7 @@ def read_json( path_or_buf: FilePath, *, orient: Optional[str] = None, - typ: Optional[Literal["frame", "series"]] = None, + typ: Optional[Literal["frame", "series"]] = "frame", dtype: Optional[DtypeArg] = None, convert_axes: Optional[bool] = None, convert_dates: Optional[Union[bool, list[str]]] = None, @@ -489,7 +490,8 @@ def read_json( >>> with open(f'{temp_dir_name}/snowpark_pandas.json', 'w') as f: ... json.dump(data, f) - >>> import snowflake.snowpark.modin.pandas as pd + >>> import modin.pandas as pd + >>> import snowflake.snowpark.modin.plugin >>> df = pd.read_json(f'{temp_dir_name}/snowpark_pandas.json') >>> df A B C diff --git a/src/snowflake/snowpark/modin/plugin/io/snow_io.py b/src/snowflake/snowpark/modin/plugin/io/snow_io.py index 1ccf6a8ef15..98441e446d9 100644 --- a/src/snowflake/snowpark/modin/plugin/io/snow_io.py +++ b/src/snowflake/snowpark/modin/plugin/io/snow_io.py @@ -434,7 +434,7 @@ def read_json( path_or_buf: FilePath, *, orient: Optional[str] = None, - typ: Optional[Literal["frame", "series"]] = None, + typ: Optional[Literal["frame", "series"]] = "frame", dtype: Optional[DtypeArg] = None, convert_axes: Optional[bool] = None, convert_dates: Optional[Union[bool, list[str]]] = None, @@ -463,7 +463,7 @@ def read_json( ) error_not_implemented_parameter("orient", orient is not None) - error_not_implemented_parameter("typ", typ is not None) + error_not_implemented_parameter("typ", typ != "frame") error_not_implemented_parameter("dtype", dtype is not None) error_not_implemented_parameter("convert_axes", convert_axes is not None) error_not_implemented_parameter("convert_dates", convert_dates is not None) diff --git a/tests/integ/modin/io/test_read_json.py b/tests/integ/modin/io/test_read_json.py index 74c869ba192..59cbae7455b 100644 --- a/tests/integ/modin/io/test_read_json.py +++ b/tests/integ/modin/io/test_read_json.py @@ -322,7 +322,7 @@ def test_read_json_malformed_file_negative(): "parameter, argument", [ ("orient", "records"), - ("typ", "frame"), + ("typ", "series"), ("dtype", True), ("convert_axes", True), ("convert_dates", True),