Skip to content

Commit

Permalink
[python] Connect resizers to the Python API (#3140)
Browse files Browse the repository at this point in the history
* [c++] Unit-test dataframe upgraders

* [python] Connect resizers to Python API

* code-review feedback
  • Loading branch information
johnkerl authored Oct 8, 2024
1 parent 6b65790 commit 50d63c1
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 9 deletions.
44 changes: 40 additions & 4 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@
from ._read_iters import TableReadIter
from ._soma_array import SOMAArray
from ._tdb_handles import DataFrameWrapper
from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of
from ._types import (
NPFloating,
NPInteger,
OpenTimestamp,
Slice,
StatusAndReason,
is_slice_of,
)
from .options import SOMATileDBContext
from .options._soma_tiledb_context import _validate_soma_tiledb_context
from .options._tiledb_create_write_options import (
Expand Down Expand Up @@ -417,17 +424,46 @@ def tiledbsoma_has_upgraded_domain(self) -> bool:
"""
return self._handle.tiledbsoma_has_upgraded_domain

def resize_soma_joinid_shape(self, newshape: int) -> None:
def resize_soma_joinid_shape(
self, newshape: int, check_only: bool = False
) -> StatusAndReason:
"""Increases the shape of the dataframe on the ``soma_joinid`` index
column, if it indeed is an index column, leaving all other index columns
as-is. If the ``soma_joinid`` is not an index column, no change is made.
This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler
to keystroke, and handles the most common case for dataframe domain
expansion. Raises an error if the dataframe doesn't already have a
domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for
1.15).
1.15). If ``check_only`` is ``True``, returns whether the operation
would succeed if attempted, and a reason why it would not.
"""
if check_only:
return cast(
StatusAndReason,
self._handle._handle.can_resize_soma_joinid_shape(newshape),
)
else:
self._handle._handle.resize_soma_joinid_shape(newshape)
return (True, "")

def upgrade_soma_joinid_shape(
self, newshape: int, check_only: bool = False
) -> StatusAndReason:
"""This is like ``upgrade_domain``, but it only applies the specified
domain update to the ``soma_joinid`` index column. Any other index
columns have their domain set to match the maxdomain. If the
``soma_joinid`` column is not an index column at all, then no action is
taken. If ``check_only`` is ``True``, returns whether the operation
would succeed if attempted, and a reason why it would not.
"""
self._handle._handle.resize_soma_joinid_shape(newshape)
if check_only:
return cast(
StatusAndReason,
self._handle._handle.can_upgrade_soma_joinid_shape(newshape),
)
else:
self._handle._handle.upgrade_soma_joinid_shape(newshape)
return (True, "")

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
Expand Down
16 changes: 12 additions & 4 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
TableReadIter,
)
from ._tdb_handles import SparseNDArrayWrapper
from ._types import NTuple, OpenTimestamp
from ._types import NTuple, OpenTimestamp, StatusAndReason
from .options._soma_tiledb_context import (
SOMATileDBContext,
_validate_soma_tiledb_context,
Expand Down Expand Up @@ -296,14 +296,22 @@ def read(

return SparseNDArrayRead(sr, self, coords)

def resize(self, newshape: Sequence[Union[int, None]]) -> None:
def resize(
self, newshape: Sequence[Union[int, None]], check_only: bool = False
) -> StatusAndReason:
"""Increases the shape of the array as specfied. Raises an error if the new
shape is less than the current shape in any dimension. Raises an error if
the new shape exceeds maxshape in any dimension. Raises an error if the
array doesn't already have a shape: in that case please call
tiledbsoma_upgrade_shape.
tiledbsoma_upgrade_shape. If ``check_only`` is ``True``, returns
whether the operation would succeed if attempted, and a reason why it
would not.
"""
self._handle.resize(newshape)
if check_only:
return self._handle.tiledbsoma_can_resize(newshape)
else:
self._handle.resize(newshape)
return (True, "")

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
Expand Down
74 changes: 73 additions & 1 deletion apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

from . import pytiledbsoma as clib
from ._exception import DoesNotExistError, SOMAError, is_does_not_exist_error
from ._types import METADATA_TYPES, Metadatum, OpenTimestamp
from ._types import METADATA_TYPES, Metadatum, OpenTimestamp, StatusAndReason
from .options._soma_tiledb_context import SOMATileDBContext

RawHandle = Union[
Expand Down Expand Up @@ -459,6 +459,12 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""Not implemented for DataFrame."""
raise NotImplementedError

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> StatusAndReason:
"""Not implemented for DataFrame."""
raise NotImplementedError

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Not implemented for DataFrame."""
raise NotImplementedError
Expand All @@ -467,6 +473,18 @@ def resize_soma_joinid_shape(self, newshape: int) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason:
"""Only implemented for DataFrame."""
raise NotImplementedError

def upgrade_soma_joinid_shape(self, newshape: int) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason:
"""Only implemented for DataFrame."""
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""
Expand Down Expand Up @@ -525,6 +543,39 @@ def resize_soma_joinid_shape(self, newshape: int) -> None:
"""
self._handle.resize_soma_joinid_shape(newshape)

def can_resize_soma_joinid_shape(self, newshape: int) -> StatusAndReason:
"""Increases the shape of the dataframe on the ``soma_joinid`` index
column, if it indeed is an index column, leaving all other index columns
as-is. If the ``soma_joinid`` is not an index column, no change is made.
This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler
to keystroke, and handles the most common case for dataframe domain
expansion. Raises an error if the dataframe doesn't already have a
domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for
1.15). If ``check_only`` is ``True``, returns whether the operation
would succeed if attempted, and a reason why it would not.
"""
return cast(
StatusAndReason, self._handle.can_resize_soma_joinid_shape(newshape)
)

def upgrade_soma_joinid_shape(self, newshape: int) -> None:
"""This is like ``upgrade_domain``, but it only applies the specified domain
update to the ``soma_joinid`` index column. Any other index columns have their
domain set to match the maxdomain. If the ``soma_joinid`` column is not an index
column at all, then no action is taken."""
self._handle.upgrade_soma_joinid_shape(newshape)

def can_upgrade_soma_joinid_shape(self, newshape: int) -> StatusAndReason:
"""This allows you to see if ``upgrade_soma_joinid_shape`` will succeed
before calling it. This is an important test-point and dev-internal
access-point, in particular, for the tiledbsoma-io experiment-level
resizer. If ``check_only`` is ``True``, returns whether the operation
would succeed if attempted, and a reason why it would not.
"""
return cast(
StatusAndReason, self._handle.can_upgrade_soma_joinid_shape(newshape)
)


class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]):
"""Wrapper around a Pybind11 SOMAPointCloudDataFrame handle."""
Expand Down Expand Up @@ -563,6 +614,16 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> StatusAndReason:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15.
"""
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()


class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
"""Wrapper around a Pybind11 SparseNDArrayWrapper handle."""
Expand Down Expand Up @@ -593,6 +654,17 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""
self._handle.resize(newshape)

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> StatusAndReason:
"""This allows you to see if ``resize`` will succeed before calling it.
This is an important test-point and dev-internal access-point, in
particular, for the tiledbsoma-io experiment-level resizer. If
``check_only`` is ``True``, returns whether the operation would succeed
if attempted, and a reason why it would not.
"""
return cast(StatusAndReason, self._handle.can_resize(newshape))

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
1.15 release notes. Raises an error if the new shape exceeds maxshape in
Expand Down
4 changes: 4 additions & 0 deletions apis/python/src/tiledbsoma/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,7 @@

Metadatum = Union[bytes, float, int, str]
METADATA_TYPES = (bytes, float, int, str)

StatusAndReason = Tuple[bool, str]
"""Information for whether an upgrade-shape or resize would succeed
if attempted, along with a reason why not."""
36 changes: 36 additions & 0 deletions apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@ void load_soma_dataframe(py::module& m) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"can_resize_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
return sdf.can_resize_soma_joinid_shape(
newshape, "can_resize_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"upgrade_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
sdf.upgrade_soma_joinid_shape(
newshape, "upgrade_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"can_upgrade_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
return sdf.can_upgrade_soma_joinid_shape(
newshape, "can_upgrade_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a);
}
} // namespace libtiledbsomacpp
11 changes: 11 additions & 0 deletions apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@ void load_soma_sparse_ndarray(py::module& m) {
},
"newshape"_a)

.def(
"can_resize",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
try {
return array.can_resize(newshape, "can_resize");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"tiledbsoma_upgrade_shape",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
Expand Down
23 changes: 23 additions & 0 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ def test_sparse_nd_array_basics(
# Test resize down
new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)])
with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
(ok, msg) = snda.resize(new_shape, check_only=True)
assert not ok
assert msg == "can_resize for soma_dim_0: new 50 < existing shape 100"
# TODO: check draft spec
# with pytest.raises(ValueError):
with pytest.raises(tiledbsoma.SOMAError):
Expand Down Expand Up @@ -162,6 +165,18 @@ def test_sparse_nd_array_basics(
with tiledbsoma.SparseNDArray.open(uri) as snda:
assert snda.shape == new_shape

(ok, msg) = snda.resize(new_shape, check_only=True)
assert ok
assert msg == ""

too_small = tuple(e - 1 for e in new_shape)
(ok, msg) = snda.resize(too_small, check_only=True)
assert not ok
assert msg == "can_resize for soma_dim_0: new 149 < existing shape 150"

with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
(ok, msg) = snda.resize(new_shape, check_only=True)


## Pending 2.27 timeframe for dense support for current domain, including resize
## https://github.com/single-cell-data/TileDB-SOMA/issues/2955
Expand Down Expand Up @@ -276,12 +291,20 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names):
# Test resize down
new_shape = 0
with tiledbsoma.DataFrame.open(uri, "w") as sdf:
ok, msg = sdf.resize_soma_joinid_shape(new_shape, check_only=True)
if has_soma_joinid_dim:
# TODO: check draft spec
# with pytest.raises(ValueError):
assert not ok
assert (
"can_resize_soma_joinid_shape: new soma_joinid shape 0 < existing shape"
in msg
)
with pytest.raises(tiledbsoma.SOMAError):
sdf.resize_soma_joinid_shape(new_shape)
else:
assert ok
assert msg == ""
sdf.resize_soma_joinid_shape(new_shape)

with tiledbsoma.DataFrame.open(uri) as sdf:
Expand Down

0 comments on commit 50d63c1

Please sign in to comment.