From ae4eb45b965b2949b4e14e1157d8c8e3a575535d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 6 Oct 2024 11:21:28 -0400 Subject: [PATCH 1/3] [c++] Unit-test dataframe upgraders --- libtiledbsoma/src/soma/soma_array.cc | 162 +++++++++++++++++++--- libtiledbsoma/test/unit_soma_dataframe.cc | 28 +++- 2 files changed, 169 insertions(+), 21 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 99138c94d1..3e12478c93 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1470,7 +1470,7 @@ std::pair SOMAArray::_can_set_shape_helper( false, fmt::format( "{}: array currently has no shape: please use " - "tiledbsoma_upgrade_shape.", + "upgrade the array.", function_name_for_messages)); } } else { @@ -1480,8 +1480,7 @@ std::pair SOMAArray::_can_set_shape_helper( return std::pair( false, fmt::format( - "{}: array already has a shape: please use resize rather " - "than tiledbsoma_upgrade_shape.", + "{}: array already has a shape: please use resize", function_name_for_messages)); } } @@ -1723,23 +1722,152 @@ void SOMAArray::_set_soma_joinid_shape_helper( ArraySchema schema = arr_->schema(); Domain domain = schema.domain(); unsigned ndim = domain.ndim(); - auto tctx = ctx_->tiledb_ctx(); - CurrentDomain old_current_domain = ArraySchemaExperimental::current_domain( - *tctx, schema); - NDRectangle ndrect = old_current_domain.ndrectangle(); - - CurrentDomain new_current_domain(*tctx); ArraySchemaEvolution schema_evolution(*tctx); + CurrentDomain new_current_domain(*tctx); + + if (!is_resize) { + // For upgrade: copy from the full/wide/max domain except for the + // soma_joinid restriction. + + NDRectangle ndrect(*tctx, domain); + + for (unsigned i = 0; i < ndim; i++) { + const Dimension& dim = domain.dimension(i); + const std::string dim_name = dim.name(); + if (dim_name == "soma_joinid") { + if (dim.type() != TILEDB_INT64) { + throw TileDBSOMAError(fmt::format( + "{}: expected soma_joinid to be of type {}; got {}", + function_name_for_messages, + tiledb::impl::type_to_str(TILEDB_INT64), + tiledb::impl::type_to_str(dim.type()))); + } + ndrect.set_range(dim_name, 0, newshape - 1); + continue; - for (unsigned i = 0; i < ndim; i++) { - if (domain.dimension(i).name() == "soma_joinid") { - ndrect.set_range( - domain.dimension(i).name(), 0, newshape - 1); + switch (dim.type()) { + case TILEDB_STRING_ASCII: + case TILEDB_STRING_UTF8: + case TILEDB_CHAR: + // TODO: make these named constants b/c they're shared + // with arrow_adapter. + ndrect.set_range(dim_name, "", "\xff"); + break; + + case TILEDB_INT8: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_BOOL: + case TILEDB_UINT8: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_INT16: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_UINT16: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_INT32: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_UINT32: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_INT64: + case TILEDB_DATETIME_YEAR: + case TILEDB_DATETIME_MONTH: + case TILEDB_DATETIME_WEEK: + case TILEDB_DATETIME_DAY: + case TILEDB_DATETIME_HR: + case TILEDB_DATETIME_MIN: + case TILEDB_DATETIME_SEC: + case TILEDB_DATETIME_MS: + case TILEDB_DATETIME_US: + case TILEDB_DATETIME_NS: + case TILEDB_DATETIME_PS: + case TILEDB_DATETIME_FS: + case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_UINT64: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_FLOAT32: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + case TILEDB_FLOAT64: + ndrect.set_range( + dim_name, + dim.domain().first, + dim.domain().second); + break; + default: + throw TileDBSOMAError(fmt::format( + "{}: internal error: unhandled type {} for {}.", + function_name_for_messages, + tiledb::impl::type_to_str(dim.type()), + dim_name)); + } + } + } + + new_current_domain.set_ndrectangle(ndrect); + + } else { + // For resize: copy from the existing current domain except for the + // new soma_joinid value. + CurrentDomain + old_current_domain = ArraySchemaExperimental::current_domain( + *tctx, schema); + NDRectangle ndrect = old_current_domain.ndrectangle(); + + for (unsigned i = 0; i < ndim; i++) { + if (domain.dimension(i).name() == "soma_joinid") { + ndrect.set_range( + domain.dimension(i).name(), 0, newshape - 1); + } } + + new_current_domain.set_ndrectangle(ndrect); } - new_current_domain.set_ndrectangle(ndrect); schema_evolution.expand_current_domain(new_current_domain); schema_evolution.array_evolve(uri_); } @@ -1755,7 +1883,8 @@ std::vector SOMAArray::_tiledb_current_domain() { if (current_domain.is_empty()) { throw TileDBSOMAError( - "Internal error: current domain requested for an array which does " + "Internal error: current domain requested for an array which " + "does " "not support it"); } @@ -1868,7 +1997,8 @@ bool SOMAArray::_dims_are_int64() { void SOMAArray::_check_dims_are_int64() { if (!_dims_are_int64()) { throw TileDBSOMAError( - "[SOMAArray] internal coding error: expected all dims to be int64"); + "[SOMAArray] internal coding error: expected all dims to be " + "int64"); } } diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 942e0f8f28..eb9fa04a2a 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -563,12 +563,12 @@ TEST_CASE_METHOD( REQUIRE(ned_sjid == std::vector({1, 2})); - REQUIRE(dom_sjid == std::vector({0, 99})); + REQUIRE(dom_sjid == std::vector({0, SOMA_JOINID_DIM_MAX})); REQUIRE(maxdom_sjid.size() == 2); REQUIRE(maxdom_sjid[0] == 0); if (!use_current_domain) { - REQUIRE(maxdom_sjid[1] == 99); + REQUIRE(maxdom_sjid[1] == SOMA_JOINID_DIM_MAX); } else { REQUIRE(maxdom_sjid[1] > 2000000000); } @@ -662,11 +662,14 @@ TEST_CASE_METHOD( if (!use_current_domain) { REQUIRE(ned_sjid == std::vector({1, 10})); - REQUIRE(dom_sjid == std::vector({0, 99})); - REQUIRE(maxdom_sjid == std::vector({0, 99})); + REQUIRE(dom_sjid == std::vector({0, SOMA_JOINID_DIM_MAX})); + REQUIRE( + maxdom_sjid == std::vector({0, SOMA_JOINID_DIM_MAX})); } else { REQUIRE(ned_sjid == std::vector({1, 101})); - REQUIRE(dom_sjid == std::vector({0, 199})); + REQUIRE( + dom_sjid == + std::vector({0, SOMA_JOINID_RESIZE_DIM_MAX})); REQUIRE(maxdom_sjid.size() == 2); REQUIRE(maxdom_sjid[0] == 0); REQUIRE(maxdom_sjid[1] > 2000000000); @@ -680,6 +683,21 @@ TEST_CASE_METHOD( REQUIRE( check.second == "testing: dataframe currently has no domain set."); + + REQUIRE(!sdf->has_current_domain()); + sdf->close(); + + sdf = open(OpenMode::write); + sdf->upgrade_soma_joinid_shape(SOMA_JOINID_DIM_MAX + 1, "testing"); + sdf->close(); + + sdf = open(OpenMode::read); + REQUIRE(sdf->has_current_domain()); + std::optional actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == SOMA_JOINID_DIM_MAX + 1); + sdf->close(); + } else { // Must fail since this is too small. REQUIRE(check.first == false); From 0ffe30b6ec074c79df4c12c1babb397d716bc633 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 6 Oct 2024 19:19:48 -0400 Subject: [PATCH 2/3] [python] Connect resizers to Python API --- apis/python/src/tiledbsoma/_dataframe.py | 29 +++++++- .../python/src/tiledbsoma/_sparse_nd_array.py | 10 ++- apis/python/src/tiledbsoma/_tdb_handles.py | 67 +++++++++++++++++++ apis/python/src/tiledbsoma/soma_dataframe.cc | 36 ++++++++++ .../src/tiledbsoma/soma_sparse_ndarray.cc | 11 +++ apis/python/tests/test_shape.py | 23 +++++++ 6 files changed, 172 insertions(+), 4 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 9bec309779..2fed32a918 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -417,7 +417,9 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: """ return self._handle.tiledbsoma_has_upgraded_domain - def resize_soma_joinid_shape(self, newshape: int) -> None: + def resize_soma_joinid_shape( + self, newshape: int, check_only: bool = False + ) -> Tuple[bool, str]: """Increases the shape of the dataframe on the ``soma_joinid`` index column, if it indeed is an index column, leaving all other index columns as-is. If the ``soma_joinid`` is not an index column, no change is made. @@ -427,7 +429,30 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for 1.15). """ - self._handle._handle.resize_soma_joinid_shape(newshape) + if check_only: + return cast( + Tuple[bool, str], + self._handle._handle.can_resize_soma_joinid_shape(newshape), + ) + else: + self._handle._handle.resize_soma_joinid_shape(newshape) + return (True, "") + + def upgrade_soma_joinid_shape( + self, newshape: int, check_only: bool = False + ) -> Tuple[bool, str]: + """This is like ``upgrade_domain``, but it only applies the specified domain + update to the ``soma_joinid`` index column. Any other index columns have their + domain set to match the maxdomain. If the ``soma_joinid`` column is not an index + column at all, then no actiong is taken.""" + if check_only: + return cast( + Tuple[bool, str], + self._handle._handle.can_upgrade_soma_joinid_shape(newshape), + ) + else: + self._handle._handle.upgrade_soma_joinid_shape(newshape) + return (True, "") def __len__(self) -> int: """Returns the number of rows in the dataframe. Same as ``df.count``.""" diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index c811d320aa..6ba32af5e6 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -296,14 +296,20 @@ def read( return SparseNDArrayRead(sr, self, coords) - def resize(self, newshape: Sequence[Union[int, None]]) -> None: + def resize( + self, newshape: Sequence[Union[int, None]], check_only: bool = False + ) -> Tuple[bool, str]: """Increases the shape of the array as specfied. Raises an error if the new shape is less than the current shape in any dimension. Raises an error if the new shape exceeds maxshape in any dimension. Raises an error if the array doesn't already have a shape: in that case please call tiledbsoma_upgrade_shape. """ - self._handle.resize(newshape) + if check_only: + return self._handle.tiledbsoma_can_resize(newshape) + else: + self._handle.resize(newshape) + return (True, "") def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Allows the array to have a resizeable shape as described in the TileDB-SOMA diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index a61159df50..bcaf7bd43c 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -459,6 +459,12 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: """Not implemented for DataFrame.""" raise NotImplementedError + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> Tuple[bool, str]: + """Not implemented for DataFrame.""" + raise NotImplementedError + def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Not implemented for DataFrame.""" raise NotImplementedError @@ -467,6 +473,18 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """Only implemented for DataFrame.""" raise NotImplementedError + def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + """Only implemented for DataFrame.""" + raise NotImplementedError + + def upgrade_soma_joinid_shape(self, newshape: int) -> None: + """Only implemented for DataFrame.""" + raise NotImplementedError + + def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + """Only implemented for DataFrame.""" + raise NotImplementedError + class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]): """Wrapper around a Pybind11 SOMADataFrame handle.""" @@ -525,6 +543,36 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """ self._handle.resize_soma_joinid_shape(newshape) + def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + """Increases the shape of the dataframe on the ``soma_joinid`` index + column, if it indeed is an index column, leaving all other index columns + as-is. If the ``soma_joinid`` is not an index column, no change is made. + This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler + to keystroke, and handles the most common case for dataframe domain + expansion. Raises an error if the dataframe doesn't already have a + domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for + 1.15). + """ + return cast( + Tuple[bool, str], self._handle.can_resize_soma_joinid_shape(newshape) + ) + + def upgrade_soma_joinid_shape(self, newshape: int) -> None: + """This is like ``upgrade_domain``, but it only applies the specified domain + update to the ``soma_joinid`` index column. Any other index columns have their + domain set to match the maxdomain. If the ``soma_joinid`` column is not an index + column at all, then no actiong is taken.""" + self._handle.upgrade_soma_joinid_shape(newshape) + + def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + """This allows you to see if ``upgrade_soma_joinid_shape`` will + succeed before calling it. This is an important test-point and + dev-internal access-point, in particular, for the tiledbsoma-io + experiment-level resizer.""" + return cast( + Tuple[bool, str], self._handle.can_upgrade_soma_joinid_shape(newshape) + ) + class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]): """Wrapper around a Pybind11 SOMAPointCloudDataFrame handle.""" @@ -563,6 +611,16 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: # https://github.com/single-cell-data/TileDB-SOMA/issues/2955 raise NotImplementedError() + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> Tuple[bool, str]: + """Supported for ``SparseNDArray``; scheduled for implementation for + ``DenseNDArray`` in TileDB-SOMA 1.15 + """ + # TODO: support current domain for dense arrays once we have core support. + # https://github.com/single-cell-data/TileDB-SOMA/issues/2955 + raise NotImplementedError() + class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]): """Wrapper around a Pybind11 SparseNDArrayWrapper handle.""" @@ -593,6 +651,15 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: """ self._handle.resize(newshape) + def tiledbsoma_can_resize( + self, newshape: Sequence[Union[int, None]] + ) -> Tuple[bool, str]: + """This allows you to see if ``resize`` will succeed before calling it. + This is an important test-point and dev-internal access-point, in + particular, for the tiledbsoma-io experiment-level resizer. + """ + return cast(Tuple[bool, str], self._handle.can_resize(newshape)) + def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Allows the array to have a resizeable shape as described in the TileDB-SOMA 1.15 release notes. Raises an error if the new shape exceeds maxshape in diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc index 43bb11c444..8fa9655b03 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.cc +++ b/apis/python/src/tiledbsoma/soma_dataframe.cc @@ -167,6 +167,42 @@ void load_soma_dataframe(py::module& m) { throw TileDBSOMAError(e.what()); } }, + "newshape"_a) + + .def( + "can_resize_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + return sdf.can_resize_soma_joinid_shape( + newshape, "can_resize_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + + .def( + "upgrade_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + sdf.upgrade_soma_joinid_shape( + newshape, "upgrade_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + + .def( + "can_upgrade_soma_joinid_shape", + [](SOMADataFrame& sdf, int64_t newshape) { + try { + return sdf.can_upgrade_soma_joinid_shape( + newshape, "can_upgrade_soma_joinid_shape"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, "newshape"_a); } } // namespace libtiledbsomacpp diff --git a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc index 5bfbf27b3f..8cceec2b3b 100644 --- a/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc +++ b/apis/python/src/tiledbsoma/soma_sparse_ndarray.cc @@ -128,6 +128,17 @@ void load_soma_sparse_ndarray(py::module& m) { }, "newshape"_a) + .def( + "can_resize", + [](SOMAArray& array, const std::vector& newshape) { + try { + return array.can_resize(newshape, "can_resize"); + } catch (const std::exception& e) { + throw TileDBSOMAError(e.what()); + } + }, + "newshape"_a) + .def( "tiledbsoma_upgrade_shape", [](SOMAArray& array, const std::vector& newshape) { diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py index 5374c56426..ef19c167cd 100644 --- a/apis/python/tests/test_shape.py +++ b/apis/python/tests/test_shape.py @@ -118,6 +118,9 @@ def test_sparse_nd_array_basics( # Test resize down new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)]) with tiledbsoma.SparseNDArray.open(uri, "w") as snda: + (ok, msg) = snda.resize(new_shape, check_only=True) + assert not ok + assert msg == "can_resize for soma_dim_0: new 50 < existing shape 100" # TODO: check draft spec # with pytest.raises(ValueError): with pytest.raises(tiledbsoma.SOMAError): @@ -162,6 +165,18 @@ def test_sparse_nd_array_basics( with tiledbsoma.SparseNDArray.open(uri) as snda: assert snda.shape == new_shape + (ok, msg) = snda.resize(new_shape, check_only=True) + assert ok + assert msg == "" + + too_small = tuple(e - 1 for e in new_shape) + (ok, msg) = snda.resize(too_small, check_only=True) + assert not ok + assert msg == "can_resize for soma_dim_0: new 149 < existing shape 150" + + with tiledbsoma.SparseNDArray.open(uri, "w") as snda: + (ok, msg) = snda.resize(new_shape, check_only=True) + ## Pending 2.27 timeframe for dense support for current domain, including resize ## https://github.com/single-cell-data/TileDB-SOMA/issues/2955 @@ -276,12 +291,20 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names): # Test resize down new_shape = 0 with tiledbsoma.DataFrame.open(uri, "w") as sdf: + ok, msg = sdf.resize_soma_joinid_shape(new_shape, check_only=True) if has_soma_joinid_dim: # TODO: check draft spec # with pytest.raises(ValueError): + assert not ok + assert ( + "can_resize_soma_joinid_shape: new soma_joinid shape 0 < existing shape" + in msg + ) with pytest.raises(tiledbsoma.SOMAError): sdf.resize_soma_joinid_shape(new_shape) else: + assert ok + assert msg == "" sdf.resize_soma_joinid_shape(new_shape) with tiledbsoma.DataFrame.open(uri) as sdf: From 8c7752a2e21fed9b70b9755010fcd4705d4904fc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 8 Oct 2024 13:15:23 -0400 Subject: [PATCH 3/3] code-review feedback --- apis/python/src/tiledbsoma/_dataframe.py | 31 ++++++++----- .../python/src/tiledbsoma/_sparse_nd_array.py | 8 ++-- apis/python/src/tiledbsoma/_tdb_handles.py | 43 +++++++++++-------- apis/python/src/tiledbsoma/_types.py | 4 ++ 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 2fed32a918..01e14f1e3d 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -24,7 +24,14 @@ from ._read_iters import TableReadIter from ._soma_array import SOMAArray from ._tdb_handles import DataFrameWrapper -from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of +from ._types import ( + NPFloating, + NPInteger, + OpenTimestamp, + Slice, + StatusAndReason, + is_slice_of, +) from .options import SOMATileDBContext from .options._soma_tiledb_context import _validate_soma_tiledb_context from .options._tiledb_create_write_options import ( @@ -419,7 +426,7 @@ def tiledbsoma_has_upgraded_domain(self) -> bool: def resize_soma_joinid_shape( self, newshape: int, check_only: bool = False - ) -> Tuple[bool, str]: + ) -> StatusAndReason: """Increases the shape of the dataframe on the ``soma_joinid`` index column, if it indeed is an index column, leaving all other index columns as-is. If the ``soma_joinid`` is not an index column, no change is made. @@ -427,11 +434,12 @@ def resize_soma_joinid_shape( to keystroke, and handles the most common case for dataframe domain expansion. Raises an error if the dataframe doesn't already have a domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for - 1.15). + 1.15). If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. """ if check_only: return cast( - Tuple[bool, str], + StatusAndReason, self._handle._handle.can_resize_soma_joinid_shape(newshape), ) else: @@ -440,14 +448,17 @@ def resize_soma_joinid_shape( def upgrade_soma_joinid_shape( self, newshape: int, check_only: bool = False - ) -> Tuple[bool, str]: - """This is like ``upgrade_domain``, but it only applies the specified domain - update to the ``soma_joinid`` index column. Any other index columns have their - domain set to match the maxdomain. If the ``soma_joinid`` column is not an index - column at all, then no actiong is taken.""" + ) -> StatusAndReason: + """This is like ``upgrade_domain``, but it only applies the specified + domain update to the ``soma_joinid`` index column. Any other index + columns have their domain set to match the maxdomain. If the + ``soma_joinid`` column is not an index column at all, then no action is + taken. If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. + """ if check_only: return cast( - Tuple[bool, str], + StatusAndReason, self._handle._handle.can_upgrade_soma_joinid_shape(newshape), ) else: diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 6ba32af5e6..ac27185df9 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -42,7 +42,7 @@ TableReadIter, ) from ._tdb_handles import SparseNDArrayWrapper -from ._types import NTuple, OpenTimestamp +from ._types import NTuple, OpenTimestamp, StatusAndReason from .options._soma_tiledb_context import ( SOMATileDBContext, _validate_soma_tiledb_context, @@ -298,12 +298,14 @@ def read( def resize( self, newshape: Sequence[Union[int, None]], check_only: bool = False - ) -> Tuple[bool, str]: + ) -> StatusAndReason: """Increases the shape of the array as specfied. Raises an error if the new shape is less than the current shape in any dimension. Raises an error if the new shape exceeds maxshape in any dimension. Raises an error if the array doesn't already have a shape: in that case please call - tiledbsoma_upgrade_shape. + tiledbsoma_upgrade_shape. If ``check_only`` is ``True``, returns + whether the operation would succeed if attempted, and a reason why it + would not. """ if check_only: return self._handle.tiledbsoma_can_resize(newshape) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index bcaf7bd43c..a41de4c748 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -35,7 +35,7 @@ from . import pytiledbsoma as clib from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error -from ._types import METADATA_TYPES, Metadatum, OpenTimestamp +from ._types import METADATA_TYPES, Metadatum, OpenTimestamp, StatusAndReason from .options._soma_tiledb_context import SOMATileDBContext RawHandle = Union[ @@ -461,7 +461,7 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: def tiledbsoma_can_resize( self, newshape: Sequence[Union[int, None]] - ) -> Tuple[bool, str]: + ) -> StatusAndReason: """Not implemented for DataFrame.""" raise NotImplementedError @@ -473,7 +473,7 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """Only implemented for DataFrame.""" raise NotImplementedError - def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: """Only implemented for DataFrame.""" raise NotImplementedError @@ -481,7 +481,7 @@ def upgrade_soma_joinid_shape(self, newshape: int) -> None: """Only implemented for DataFrame.""" raise NotImplementedError - def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: """Only implemented for DataFrame.""" raise NotImplementedError @@ -543,7 +543,7 @@ def resize_soma_joinid_shape(self, newshape: int) -> None: """ self._handle.resize_soma_joinid_shape(newshape) - def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: + def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason: """Increases the shape of the dataframe on the ``soma_joinid`` index column, if it indeed is an index column, leaving all other index columns as-is. If the ``soma_joinid`` is not an index column, no change is made. @@ -551,26 +551,29 @@ def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: to keystroke, and handles the most common case for dataframe domain expansion. Raises an error if the dataframe doesn't already have a domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for - 1.15). + 1.15). If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. """ return cast( - Tuple[bool, str], self._handle.can_resize_soma_joinid_shape(newshape) + StatusAndReason, self._handle.can_resize_soma_joinid_shape(newshape) ) def upgrade_soma_joinid_shape(self, newshape: int) -> None: """This is like ``upgrade_domain``, but it only applies the specified domain update to the ``soma_joinid`` index column. Any other index columns have their domain set to match the maxdomain. If the ``soma_joinid`` column is not an index - column at all, then no actiong is taken.""" + column at all, then no action is taken.""" self._handle.upgrade_soma_joinid_shape(newshape) - def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]: - """This allows you to see if ``upgrade_soma_joinid_shape`` will - succeed before calling it. This is an important test-point and - dev-internal access-point, in particular, for the tiledbsoma-io - experiment-level resizer.""" + def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason: + """This allows you to see if ``upgrade_soma_joinid_shape`` will succeed + before calling it. This is an important test-point and dev-internal + access-point, in particular, for the tiledbsoma-io experiment-level + resizer. If ``check_only`` is ``True``, returns whether the operation + would succeed if attempted, and a reason why it would not. + """ return cast( - Tuple[bool, str], self._handle.can_upgrade_soma_joinid_shape(newshape) + StatusAndReason, self._handle.can_upgrade_soma_joinid_shape(newshape) ) @@ -613,9 +616,9 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: def tiledbsoma_can_resize( self, newshape: Sequence[Union[int, None]] - ) -> Tuple[bool, str]: + ) -> StatusAndReason: """Supported for ``SparseNDArray``; scheduled for implementation for - ``DenseNDArray`` in TileDB-SOMA 1.15 + ``DenseNDArray`` in TileDB-SOMA 1.15. """ # TODO: support current domain for dense arrays once we have core support. # https://github.com/single-cell-data/TileDB-SOMA/issues/2955 @@ -653,12 +656,14 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None: def tiledbsoma_can_resize( self, newshape: Sequence[Union[int, None]] - ) -> Tuple[bool, str]: + ) -> StatusAndReason: """This allows you to see if ``resize`` will succeed before calling it. This is an important test-point and dev-internal access-point, in - particular, for the tiledbsoma-io experiment-level resizer. + particular, for the tiledbsoma-io experiment-level resizer. If + ``check_only`` is ``True``, returns whether the operation would succeed + if attempted, and a reason why it would not. """ - return cast(Tuple[bool, str], self._handle.can_resize(newshape)) + return cast(StatusAndReason, self._handle.can_resize(newshape)) def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None: """Allows the array to have a resizeable shape as described in the TileDB-SOMA diff --git a/apis/python/src/tiledbsoma/_types.py b/apis/python/src/tiledbsoma/_types.py index 5dc742c252..4033c0d6fd 100644 --- a/apis/python/src/tiledbsoma/_types.py +++ b/apis/python/src/tiledbsoma/_types.py @@ -78,3 +78,7 @@ Metadatum = Union[bytes, float, int, str] METADATA_TYPES = (bytes, float, int, str) + +StatusAndReason = Tuple[bool, str] +"""Information for whether an upgrade-shape or resize would succeed +if attempted, along with a reason why not."""