Skip to content

Commit

Permalink
[python] Connect resizers to Python API
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 6, 2024
1 parent ae4eb45 commit 8c2aaaf
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 4 deletions.
29 changes: 27 additions & 2 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ def tiledbsoma_has_upgraded_domain(self) -> bool:
"""
return self._handle.tiledbsoma_has_upgraded_domain

def resize_soma_joinid_shape(self, newshape: int) -> None:
def resize_soma_joinid_shape(
self, newshape: int, check_only: bool = False
) -> Tuple[bool, str]:
"""Increases the shape of the dataframe on the ``soma_joinid`` index
column, if it indeed is an index column, leaving all other index columns
as-is. If the ``soma_joinid`` is not an index column, no change is made.
Expand All @@ -427,7 +429,30 @@ def resize_soma_joinid_shape(self, newshape: int) -> None:
domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for
1.15).
"""
self._handle._handle.resize_soma_joinid_shape(newshape)
if check_only:
return cast(
Tuple[bool, str],
self._handle._handle.can_resize_soma_joinid_shape(newshape),
)
else:
self._handle._handle.resize_soma_joinid_shape(newshape)
return (True, "")

def upgrade_soma_joinid_shape(
self, newshape: int, check_only: bool = False
) -> Tuple[bool, str]:
"""This is like ``upgrade_domain``, but it only applies the specified domain
update to the ``soma_joinid`` index column. Any other index columns have their
domain set to match the maxdomain. If the ``soma_joinid`` column is not an index
column at all, then no actiong is taken."""
if check_only:
return cast(
Tuple[bool, str],
self._handle._handle.can_upgrade_soma_joinid_shape(newshape),
)
else:
self._handle._handle.upgrade_soma_joinid_shape(newshape)
return (True, "")

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
Expand Down
10 changes: 8 additions & 2 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,14 +296,20 @@ def read(

return SparseNDArrayRead(sr, self, coords)

def resize(self, newshape: Sequence[Union[int, None]]) -> None:
def resize(
self, newshape: Sequence[Union[int, None]], check_only: bool = False
) -> Tuple[bool, str]:
"""Increases the shape of the array as specfied. Raises an error if the new
shape is less than the current shape in any dimension. Raises an error if
the new shape exceeds maxshape in any dimension. Raises an error if the
array doesn't already have a shape: in that case please call
tiledbsoma_upgrade_shape.
"""
self._handle.resize(newshape)
if check_only:
return self._handle.tiledbsoma_can_resize(newshape)
else:
self._handle.resize(newshape)
return (True, "")

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
Expand Down
67 changes: 67 additions & 0 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,12 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""Not implemented for DataFrame."""
raise NotImplementedError

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> Tuple[bool, str]:
"""Not implemented for DataFrame."""
raise NotImplementedError

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Not implemented for DataFrame."""
raise NotImplementedError
Expand All @@ -467,6 +473,18 @@ def resize_soma_joinid_shape(self, newshape: int) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]:
"""Only implemented for DataFrame."""
raise NotImplementedError

def upgrade_soma_joinid_shape(self, newshape: int) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]:
"""Only implemented for DataFrame."""
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""
Expand Down Expand Up @@ -525,6 +543,36 @@ def resize_soma_joinid_shape(self, newshape: int) -> None:
"""
self._handle.resize_soma_joinid_shape(newshape)

def can_resize_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]:
"""Increases the shape of the dataframe on the ``soma_joinid`` index
column, if it indeed is an index column, leaving all other index columns
as-is. If the ``soma_joinid`` is not an index column, no change is made.
This is a special case of ``upgrade_domain`` (WIP for 1.15), but simpler
to keystroke, and handles the most common case for dataframe domain
expansion. Raises an error if the dataframe doesn't already have a
domain: in that case please call ``tiledbsoma_upgrade_domain`` (WIP for
1.15).
"""
return cast(
Tuple[bool, str], self._handle.can_resize_soma_joinid_shape(newshape)
)

def upgrade_soma_joinid_shape(self, newshape: int) -> None:
"""This is like ``upgrade_domain``, but it only applies the specified domain
update to the ``soma_joinid`` index column. Any other index columns have their
domain set to match the maxdomain. If the ``soma_joinid`` column is not an index
column at all, then no actiong is taken."""
self._handle.upgrade_soma_joinid_shape(newshape)

def can_upgrade_soma_joinid_shape(self, newshape: int) -> Tuple[bool, str]:
"""This allows you to see if ``upgrade_soma_joinid_shape`` will
succeed before calling it. This is an important test-point and
dev-internal access-point, in particular, for the tiledbsoma-io
experiment-level resizer."""
return cast(
Tuple[bool, str], self._handle.can_upgrade_soma_joinid_shape(newshape)
)


class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]):
"""Wrapper around a Pybind11 SOMAPointCloudDataFrame handle."""
Expand Down Expand Up @@ -563,6 +611,16 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> Tuple[bool, str]:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15
"""
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()


class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
"""Wrapper around a Pybind11 SparseNDArrayWrapper handle."""
Expand Down Expand Up @@ -593,6 +651,15 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""
self._handle.resize(newshape)

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> Tuple[bool, str]:
"""This allows you to see if ``resize`` will succeed before calling it.
This is an important test-point and dev-internal access-point, in
particular, for the tiledbsoma-io experiment-level resizer.
"""
return cast(Tuple[bool, str], self._handle.can_resize(newshape))

def tiledbsoma_upgrade_shape(self, newshape: Sequence[Union[int, None]]) -> None:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
1.15 release notes. Raises an error if the new shape exceeds maxshape in
Expand Down
36 changes: 36 additions & 0 deletions apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@ void load_soma_dataframe(py::module& m) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"can_resize_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
return sdf.can_resize_soma_joinid_shape(
newshape, "can_resize_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"upgrade_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
sdf.upgrade_soma_joinid_shape(
newshape, "upgrade_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"can_upgrade_soma_joinid_shape",
[](SOMADataFrame& sdf, int64_t newshape) {
try {
return sdf.can_upgrade_soma_joinid_shape(
newshape, "can_upgrade_soma_joinid_shape");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a);
}
} // namespace libtiledbsomacpp
11 changes: 11 additions & 0 deletions apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@ void load_soma_sparse_ndarray(py::module& m) {
},
"newshape"_a)

.def(
"can_resize",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
try {
return array.can_resize(newshape, "can_resize");
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"newshape"_a)

.def(
"tiledbsoma_upgrade_shape",
[](SOMAArray& array, const std::vector<int64_t>& newshape) {
Expand Down
29 changes: 29 additions & 0 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@

import tiledbsoma

# ? upgrade_soma_joinid_shape
# ? w/ check_only
#
# ? tiledbsoma_upgrade_shape
# ? w/ check_only


@pytest.mark.parametrize(
"element_dtype",
Expand Down Expand Up @@ -118,6 +124,9 @@ def test_sparse_nd_array_basics(
# Test resize down
new_shape = tuple([arg_shape[i] - 50 for i in range(ndim)])
with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
(ok, msg) = snda.resize(new_shape, check_only=True)
assert not ok
assert msg == "can_resize for soma_dim_0: new 50 < existing shape 100"
# TODO: check draft spec
# with pytest.raises(ValueError):
with pytest.raises(tiledbsoma.SOMAError):
Expand Down Expand Up @@ -162,6 +171,18 @@ def test_sparse_nd_array_basics(
with tiledbsoma.SparseNDArray.open(uri) as snda:
assert snda.shape == new_shape

(ok, msg) = snda.resize(new_shape, check_only=True)
assert ok
assert msg == ""

too_small = tuple(e - 1 for e in new_shape)
(ok, msg) = snda.resize(too_small, check_only=True)
assert not ok
assert msg == "can_resize for soma_dim_0: new 149 < existing shape 150"

with tiledbsoma.SparseNDArray.open(uri, "w") as snda:
(ok, msg) = snda.resize(new_shape, check_only=True)


## Pending 2.27 timeframe for dense support for current domain, including resize
## https://github.com/single-cell-data/TileDB-SOMA/issues/2955
Expand Down Expand Up @@ -276,12 +297,20 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names):
# Test resize down
new_shape = 0
with tiledbsoma.DataFrame.open(uri, "w") as sdf:
ok, msg = sdf.resize_soma_joinid_shape(new_shape, check_only=True)
if has_soma_joinid_dim:
# TODO: check draft spec
# with pytest.raises(ValueError):
assert not ok
assert (
"can_resize_soma_joinid_shape: new soma_joinid shape 0 < existing shape"
in msg
)
with pytest.raises(tiledbsoma.SOMAError):
sdf.resize_soma_joinid_shape(new_shape)
else:
assert ok
assert msg == ""
sdf.resize_soma_joinid_shape(new_shape)

with tiledbsoma.DataFrame.open(uri) as sdf:
Expand Down

0 comments on commit 8c2aaaf

Please sign in to comment.