diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 9bec309779..01e14f1e3d 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -24,7 +24,14 @@ from ._read_iters import TableReadIter from ._soma_array import SOMAArray from ._tdb_handles import DataFrameWrapper -from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of +from ._types import ( + NPFloating, + NPInteger, + OpenTimestamp, + Slice, + StatusAndReason, + is_slice_of, +) from .options import SOMATileDBContext from .options._soma_tiledb_context import _validate_soma_tiledb_context from .options._tiledb_create_write_options import ( @@ -417,7 +424,9 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: """ return self._handle.tiledbsoma_has_upgraded_domain - def resize_soma_joinid_shape(self, newshape: int) -> None: + def resize_soma_joinid_shape( + self, newshape: int, check_only: bool = False + ) -> StatusAndReason: """Increases the shape of the dataframe on the ``soma_joinid`` index column, if it indeed is an index column, leaving all other index columns as-is. If the ``soma_joinid`` is not an index column, no change is made. @@ -425,9 +434,36 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: to keystroke, and handles the most common case for dataframe domain expansion. Raises an error if the dataframe doesn't already have a domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for - 1.15). + 1.15). If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. + """ + if check_only: + return cast( + StatusAndReason, + self._handle._handle.can_resize_soma_joinid_shape(newshape), + ) + else: + self._handle._handle.resize_soma_joinid_shape(newshape) + return (True, "") + + def upgrade_soma_joinid_shape( + self, newshape: int, check_only: bool = False + ) -> StatusAndReason: + """This is like ``upgrade_domain``, but it only applies the specified + domain update to the ``soma_joinid`` index column. Any other index + columns have their domain set to match the maxdomain. If the + ``soma_joinid`` column is not an index column at all, then no action is + taken. If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. """ - self._handle._handle.resize_soma_joinid_shape(newshape) + if check_only: + return cast( + StatusAndReason, + self._handle._handle.can_upgrade_soma_joinid_shape(newshape), + ) + else: + self._handle._handle.upgrade_soma_joinid_shape(newshape) + return (True, "") def __len__(self) -> int: """Returns the number of rows in the dataframe. Same as ``df.count``.""" diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index c811d320aa..ac27185df9 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -42,7 +42,7 @@ TableReadIter, ) from ._tdb_handles import SparseNDArrayWrapper -from ._types import NTuple, OpenTimestamp +from ._types import NTuple, OpenTimestamp, StatusAndReason from .options._soma_tiledb_context import ( SOMATileDBContext, _validate_soma_tiledb_context, @@ -296,14 +296,22 @@ def read( return SparseNDArrayRead(sr, self, coords) - def resize(self, newshape: Sequence[Union[int, None]]) -> None: + def resize( + self, newshape: Sequence[Union[int, None]], check_only: bool = False + ) -> StatusAndReason: """Increases the shape of the array as specfied. Raises an error if the new shape is less than the current shape in any dimension. Raises an error if the new shape exceeds maxshape in any dimension. Raises an error if the array doesn't already have a shape: in that case please call - tiledbsoma_upgrade_shape. + tiledbsoma_upgrade_shape. If ``check_only`` is ``True``, returns + whether the operation would succeed if attempted, and a reason why it + would not. """ - self._handle.resize(newshape) + if check_only: + return self._handle.tiledbsoma_can_resize(newshape) + else: + self._handle.resize(newshape) + return (True, "") def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Allows the array to have a resizeable shape as described in the TileDB-SOMA diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index a61159df50..a41de4c748 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -35,7 +35,7 @@ from . import pytiledbsoma as clib from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error -from ._types import METADATA_TYPES, Metadatum, OpenTimestamp +from ._types import METADATA_TYPES, Metadatum, OpenTimestamp, StatusAndReason from .options._soma_tiledb_context import SOMATileDBContext RawHandle = Union[ @@ -459,6 +459,12 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: """Not implemented for DataFrame.""" raise NotImplementedError + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> StatusAndReason: + """Not implemented for DataFrame.""" + raise NotImplementedError + def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Not implemented for DataFrame.""" raise NotImplementedError @@ -467,6 +473,18 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """Only implemented for DataFrame.""" raise NotImplementedError + def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: + """Only implemented for DataFrame.""" + raise NotImplementedError + + def upgrade_soma_joinid_shape(self, newshape: int) -> None: + """Only implemented for DataFrame.""" + raise NotImplementedError + + def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: + """Only implemented for DataFrame.""" + raise NotImplementedError + class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]): """Wrapper around a Pybind11 SOMADataFrame handle.""" @@ -525,6 +543,39 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """ self._handle.resize_soma_joinid_shape(newshape) + def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: + """Increases the shape of the dataframe on the ``soma_joinid`` index + column, if it indeed is an index column, leaving all other index columns + as-is. If the ``soma_joinid`` is not an index column, no change is made. + This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler + to keystroke, and handles the most common case for dataframe domain + expansion. Raises an error if the dataframe doesn't already have a + domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for + 1.15). If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. + """ + return cast( + StatusAndReason, self._handle.can_resize_soma_joinid_shape(newshape) + ) + + def upgrade_soma_joinid_shape(self, newshape: int) -> None: + """This is like ``upgrade_domain``, but it only applies the specified domain + update to the ``soma_joinid`` index column. Any other index columns have their + domain set to match the maxdomain. If the ``soma_joinid`` column is not an index + column at all, then no action is taken.""" + self._handle.upgrade_soma_joinid_shape(newshape) + + def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: + """This allows you to see if ``upgrade_soma_joinid_shape`` will succeed + before calling it. This is an important test-point and dev-internal + access-point, in particular, for the tiledbsoma-io experiment-level + resizer. If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. + """ + return cast( + StatusAndReason, self._handle.can_upgrade_soma_joinid_shape(newshape) + ) + class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]): """Wrapper around a Pybind11 SOMAPointCloudDataFrame handle.""" @@ -563,6 +614,16 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: # https://github.com/single-cell-data/TileDB-SOMA/issues/2955 raise NotImplementedError() + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> StatusAndReason: + """Supported for ``SparseNDArray``; scheduled for implementation for + ``DenseNDArray`` in TileDB-SOMA 1.15. + """ + # TODO: support current domain for dense arrays once we have core support. + # https://github.com/single-cell-data/TileDB-SOMA/issues/2955 + raise NotImplementedError() + class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]): """Wrapper around a Pybind11 SparseNDArrayWrapper handle.""" @@ -593,6 +654,17 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: """ self._handle.resize(newshape) + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> StatusAndReason: + """This allows you to see if ``resize`` will succeed before calling it. + This is an important test-point and dev-internal access-point, in + particular, for the tiledbsoma-io experiment-level resizer. If + ``check_only`` is ``True``, returns whether the operation would succeed + if attempted, and a reason why it would not. + """ + return cast(StatusAndReason, self._handle.can_resize(newshape)) + def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Allows the array to have a resizeable shape as described in the TileDB-SOMA 1.15 release notes. Raises an error if the new shape exceeds maxshape in diff --git a/apis/python/src/tiledbsoma/_types.py b/apis/python/src/tiledbsoma/_types.py index 5dc742c252..4033c0d6fd 100644 --- a/apis/python/src/tiledbsoma/_types.py +++ b/apis/python/src/tiledbsoma/_types.py @@ -78,3 +78,7 @@ Metadatum = Union[bytes, float, int, str] METADATA_TYPES = (bytes, float, int, str) + +StatusAndReason = Tuple[bool, str] +"""Information for whether an upgrade-shape or resize would succeed +if attempted, along with a reason why not.""" diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc index 43bb11c444..8fa9655b03 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.cc +++ b/apis/python/src/tiledbsoma/soma_dataframe.cc @@ -167,6 +167,42 @@ void load_soma_dataframe(py::module& m) { throw TileDBSOMAError(e.what()); } }, + "newshape"_a) + + .def( + "can_resize_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + return sdf.can_resize_soma_joinid_shape( + newshape, "can_resize_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + + .def( + "upgrade_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + sdf.upgrade_soma_joinid_shape( + newshape, "upgrade_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + + .def( + "can_upgrade_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + return sdf.can_upgrade_soma_joinid_shape( + newshape, "can_upgrade_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, "newshape"_a); } } // namespace libtiledbsomacpp diff --git a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc index 5bfbf27b3f..8cceec2b3b 100644 --- a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc +++ b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc @@ -128,6 +128,17 @@ void load_soma_sparse_ndarray(py::module& m) { }, "newshape"_a) + .def( + "can_resize", + [](SOMAArray& array, const std::vector& newshape) { + try { + return array.can_resize(newshape, "can_resize"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + .def( "tiledbsoma_upgrade_shape", [](SOMAArray& array, const std::vector& newshape) { diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py index 5374c56426..ef19c167cd 100644 --- a/apis/python/tests/test_shape.py +++ b/apis/python/tests/test_shape.py @@ -118,6 +118,9 @@ def test_sparse_nd_array_basics( # Test resize down new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)]) with tiledbsoma.SparseNDArray.open(uri, "w") as snda: + (ok, msg) = snda.resize(new_shape, check_only=True) + assert not ok + assert msg == "can_resize for soma_dim_0: new 50 < existing shape 100" # TODO: check draft spec # with pytest.raises(ValueError): with pytest.raises(tiledbsoma.SOMAError): @@ -162,6 +165,18 @@ def test_sparse_nd_array_basics( with tiledbsoma.SparseNDArray.open(uri) as snda: assert snda.shape == new_shape + (ok, msg) = snda.resize(new_shape, check_only=True) + assert ok + assert msg == "" + + too_small = tuple(e - 1 for e in new_shape) + (ok, msg) = snda.resize(too_small, check_only=True) + assert not ok + assert msg == "can_resize for soma_dim_0: new 149 < existing shape 150" + + with tiledbsoma.SparseNDArray.open(uri, "w") as snda: + (ok, msg) = snda.resize(new_shape, check_only=True) + ## Pending 2.27 timeframe for dense support for current domain, including resize ## https://github.com/single-cell-data/TileDB-SOMA/issues/2955 @@ -276,12 +291,20 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names): # Test resize down new_shape = 0 with tiledbsoma.DataFrame.open(uri, "w") as sdf: + ok, msg = sdf.resize_soma_joinid_shape(new_shape, check_only=True) if has_soma_joinid_dim: # TODO: check draft spec # with pytest.raises(ValueError): + assert not ok + assert ( + "can_resize_soma_joinid_shape: new soma_joinid shape 0 < existing shape" + in msg + ) with pytest.raises(tiledbsoma.SOMAError): sdf.resize_soma_joinid_shape(new_shape) else: + assert ok + assert msg == "" sdf.resize_soma_joinid_shape(new_shape) with tiledbsoma.DataFrame.open(uri) as sdf: