Skip to content

Commit

Permalink
incorporate more PR fb
Browse files Browse the repository at this point in the history
  • Loading branch information
bkmartinjr committed Nov 15, 2024
1 parent 1986ad5 commit bc866f8
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 54 deletions.
6 changes: 3 additions & 3 deletions apis/python/src/tiledbsoma/_fastercsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing_extensions import TypeAlias

from .options._soma_tiledb_context import SOMATileDBContext
from .pytiledbsoma.fastercsx import compress_coo, copy_to_dense, sort_indices
from .pytiledbsoma.fastercsx import compress_coo, copy_csx_to_dense, sort_csx_indices

NDArrayIndex: TypeAlias = npt.NDArray[np.integer[Any]]
NDArrayNumber: TypeAlias = npt.NDArray[Union[np.integer[Any], np.floating[Any]]]
Expand Down Expand Up @@ -87,7 +87,7 @@ def from_ijd(
context.native_context, (n_major, n_minor), i, j, d, indptr, indices, data
)
if make_sorted:
sort_indices(context.native_context, indptr, indices, data)
sort_csx_indices(context.native_context, indptr, indices, data)
return CompressedMatrix(
indptr, indices, data, shape, format, make_sorted, context
)
Expand Down Expand Up @@ -200,7 +200,7 @@ def to_numpy(self, index: slice | None = None) -> NDArrayNumber:
else (self.shape[0], n_major)
)
out = np.zeros(math.prod(out_shape), dtype=self.data.dtype)
copy_to_dense(
copy_csx_to_dense(
self.context.native_context,
major_idx_start,
major_idx_end,
Expand Down
30 changes: 15 additions & 15 deletions apis/python/src/tiledbsoma/fastercsx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ void compress_coo_validate_args_(
3. sum(Ai.size())==sum(Aj.size())==sum(Ad.size())==Bj.size()==Bd.size()
(this is nnz).
4. num chunks/items in Ai/Aj/Ad is same size and type
5. ensure B* are writable
5. ensure B* are writeable
6. Ensure each element in A* tuples are same type
etc...
Expand Down Expand Up @@ -286,7 +286,7 @@ void compress_coo_validate_args_(
throw std::length_error("Pointer array size does not match n_rows.");

if (!Bp.writeable() || !Bj.writeable() || !Bd.writeable())
throw std::invalid_argument("Output arrays must be writable.");
throw std::invalid_argument("Output arrays must be writeable.");

if (Ai.size() > 0) {
if (!Ai[0].dtype().is(Aj[0].dtype()))
Expand All @@ -298,7 +298,7 @@ void compress_coo_validate_args_(
/**
* @brief python binding for coo_compress, which converts COO chunked array (I,
* J, D) into CSX arrays (P, J, D). The output arrays are NOT sorted on the
* minor dimension - see `sort_indices` if that is required.
* minor dimension -- see `sort_csx_indices` if that is required.
*/
void compress_coo(
std::shared_ptr<tiledbsoma::SOMAContext> ctx,
Expand Down Expand Up @@ -382,10 +382,10 @@ void compress_coo(
}

/**
* @brief python binding for sort_indices, which sorts minor dimension of a
* @brief Python binding for sort_csx_indices, which sorts minor dimension of a
* compressed matrix
*/
void sort_indices(
void sort_csx_indices(
std::shared_ptr<tiledbsoma::SOMAContext> ctx,
py::array Bp,
py::array Bj,
Expand All @@ -395,7 +395,7 @@ void sort_indices(
if (Bp.ndim() != 1 || Bj.ndim() != 1 || Bd.ndim() != 1)
throw std::length_error("All arrays must be 1D");
if (!Bp.writeable() || !Bj.writeable() || !Bd.writeable())
throw std::invalid_argument("Output arrays must be writable.");
throw std::invalid_argument("Output arrays must be writeable.");

// Get dispatch types
CsxIndexType csx_major_index_type = lookup_dtype_(
Expand Down Expand Up @@ -427,7 +427,7 @@ void sort_indices(
make_mutable_casted_span_<VALUE, remap_value_t<VALUE>>(Bd);

py::gil_scoped_release release;
return fastercsx::sort_indices(
return fastercsx::sort_csx_indices(
ctx->thread_pool().get(),
n_row,
nnz,
Expand All @@ -443,7 +443,7 @@ void sort_indices(
/**
* @brief Python binding for parallel slice+to_dense operation.
*/
void copy_to_dense(
void copy_csx_to_dense(
std::shared_ptr<tiledbsoma::SOMAContext> ctx,
const uint64_t major_idx_start,
const int64_t major_idx_end,
Expand All @@ -469,7 +469,7 @@ void copy_to_dense(
if (n_major != Bp.size() - 1)
throw std::length_error("n_rows does not match Bp.size()");
if (!out.writeable())
throw std::invalid_argument("out must be writable");
throw std::invalid_argument("out must be writeable");
if (out.dtype().num() != Bd.dtype().num())
throw pybind11::type_error("out dtype must match Bd dtype");

Expand Down Expand Up @@ -498,7 +498,7 @@ void copy_to_dense(
make_mutable_casted_span_<VALUE, remap_value_t<VALUE>>(out);

py::gil_scoped_release release;
return fastercsx::copy_to_dense(
return fastercsx::copy_csx_to_dense(
ctx->thread_pool().get(),
major_idx_start,
major_idx_end,
Expand Down Expand Up @@ -538,7 +538,7 @@ void count_rows(
throw std::length_error("Pointer array size does not match n_rows.");

if (!Bp.writeable())
throw std::invalid_argument("Output arrays must be writable.");
throw std::invalid_argument("Output arrays must be writeable.");

CooIndexType coo_index_type = lookup_dtype_(
coo_index_type_dispatch, Ai[0].dtype(), "COO index (row, col) arrays");
Expand Down Expand Up @@ -583,17 +583,17 @@ void load_fastercsx(py::module& m) {
"Create CSX elements");

fastercsx_m.def(
"sort_indices",
sort_indices,
"sort_csx_indices",
sort_csx_indices,
py::arg("ctx").noconvert(),
py::arg("Bp").noconvert(),
py::arg("Bj").noconvert(),
py::arg("Bd").noconvert(),
"Sort minor axis indices and data");

fastercsx_m.def(
"copy_to_dense",
copy_to_dense,
"copy_csx_to_dense",
copy_csx_to_dense,
py::arg("ctx").noconvert(),
py::arg("major_idx_start"),
py::arg("major_idx_end"),
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def to_tiledb_supported_array_type(name: str, x: _MT) -> _MT:
def csr_from_coo_table(
tbl: pa.Table, num_rows: int, num_cols: int, context: SOMATileDBContext
) -> sp.csr_matrix:
"""Given an Arrow Table containing COO data, return a ``scipy.sparse.csr_matrx``."""
"""Given an Arrow Table containing COO data, return a ``scipy.sparse.csr_matrix``."""
s = CompressedMatrix.from_soma(
tbl, (num_rows, num_cols), "csr", True, context
).to_scipy()
Expand Down
7 changes: 0 additions & 7 deletions apis/python/tests/test_fastercsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,3 @@ def test_bad_shapes(context: soma.SOMATileDBContext, rng: np.random.Generator) -
fastercsx.CompressedMatrix.from_ijd(
sp.row, sp.col, sp.data, shp, "csr", True, context
)


"""
other tests to consider
1. duplicates
"""
34 changes: 17 additions & 17 deletions apis/python/tests/test_libfastercsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_construction(
)

if sp.nnz >= np.iinfo(csr_major_index_dtype).max:
# only occur if we mess up the test params
# only occurs if we mess up the test params
pytest.skip(reason="NNZ is too large for index type.")

indptr = np.empty((sp.shape[0] + 1), dtype=csr_major_index_dtype)
Expand All @@ -85,7 +85,7 @@ def test_construction(
indices,
data,
)
fastercsx.sort_indices(context, indptr, indices, data)
fastercsx.sort_csx_indices(context, indptr, indices, data)

# Verify equality with SciPy constructed CSR
csr = sp.tocsr()
Expand Down Expand Up @@ -144,7 +144,7 @@ def test_partitioning(
indices,
data,
)
fastercsx.sort_indices(context, indptr, indices, data)
fastercsx.sort_csx_indices(context, indptr, indices, data)

# Verify equality with SciPy constructed CSR
csr = sp.tocsr()
Expand All @@ -164,7 +164,7 @@ def test_partitioning(
def test_multichunk(
nchunks: int, shape: tuple[int, int], density: float, context: clib.SOMAContext
) -> None:
"""check that multi-chunk COO input functions correctly"""
"""Check that multi-chunk COO input functions correctly"""

rng = np.random.default_rng()
sp = sparse.random(
Expand Down Expand Up @@ -194,7 +194,7 @@ def test_multichunk(
indices,
data,
)
fastercsx.sort_indices(context, indptr, indices, data)
fastercsx.sort_csx_indices(context, indptr, indices, data)

# Verify equality with SciPy constructed CSR
csr = sp.tocsr()
Expand All @@ -203,45 +203,45 @@ def test_multichunk(
assert np.array_equal(data, csr.data)


def test_sort_indices_bad_args(
def test_sort_csx_indices_bad_args(
rng: np.random.Generator, context: clib.SOMAContext
) -> None:

sp = sparse.random(2830, 212, density=0.1, dtype=np.int8, random_state=rng).tocsr()
p, j, d = sp.indptr, sp.indices, sp.data

with pytest.raises(TypeError):
fastercsx.sort_indices(None, p, j, d)
fastercsx.sort_csx_indices(None, p, j, d)
with pytest.raises(TypeError):
fastercsx.sort_indices(0, p, j, d)
fastercsx.sort_csx_indices(0, p, j, d)
with pytest.raises(TypeError):
fastercsx.sort_indices(context, None, j, d)
fastercsx.sort_csx_indices(context, None, j, d)
with pytest.raises(TypeError):
fastercsx.sort_indices(context, p, None, d)
fastercsx.sort_csx_indices(context, p, None, d)
with pytest.raises(TypeError):
fastercsx.sort_indices(context, p, j, None)
fastercsx.sort_csx_indices(context, p, j, None)

with pytest.raises(ValueError):
fastercsx.sort_indices(context, p[:-1], j, d)
fastercsx.sort_csx_indices(context, p[:-1], j, d)

with pytest.raises(ValueError):
fastercsx.sort_indices(context, p, j[1:], d)
fastercsx.sort_csx_indices(context, p, j[1:], d)

with pytest.raises(ValueError):
fastercsx.sort_indices(context, p, j, d[1:])
fastercsx.sort_csx_indices(context, p, j, d[1:])

with pytest.raises(ValueError):
fastercsx.sort_indices(context, np.zeros_like(p), j, d)
fastercsx.sort_csx_indices(context, np.zeros_like(p), j, d)

with pytest.raises(OverflowError):
pbad = p.copy()
pbad[1] = sp.nnz + 1000000
fastercsx.sort_indices(context, pbad, j, d)
fastercsx.sort_csx_indices(context, pbad, j, d)

with pytest.raises(OverflowError):
pbad = p.copy()
pbad[1] = -1
fastercsx.sort_indices(context, pbad, j, d)
fastercsx.sort_csx_indices(context, pbad, j, d)


def test_compress_coo_bad_args(
Expand Down
24 changes: 13 additions & 11 deletions libtiledbsoma/src/utils/fastercsx.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ size_t sum_over_size_(const std::vector<tcb::span<const T>>& v) noexcept {
* greedy n-way linear partition, which works well enough for the common case
* where most inputs are of similar sizes (which is typically the case).
*
* TODO: If we sort (descending) first, it will be LPT which would be better in
* edge cases where there are a lot of variable sized input vectors.
* TODO: If we sort (descending) first, it will be classic LPT (Largest Processing
* Time) partitioning which would be better in edge cases where there are a lot
* of variable sized input vectors.
*/
template <typename COO_IDX>
struct Partition {
Expand Down Expand Up @@ -140,7 +141,7 @@ std::vector<Partition<COO_IDX>> partition_views_(
}

/**
* @brief Count occurrences of all values present in Ai - used as the basis
* @brief Count occurrences of all values present in Ai -- used as the basis
* for constructing the index pointer array.
*/
template <typename COO_IDX, typename CSX_MAJOR_IDX>
Expand Down Expand Up @@ -326,7 +327,7 @@ void compress_coo(
assert(Bj.size() == nnz);
assert(Bd.size() == nnz);

// get major axis counts. Important side effect: range checks the Ai
// Get major axis counts. Important side effect: range checks the Ai
// values.
count_rows_(tp, n_row, nnz, Ai, Bp);
sum_rows_to_pointers_(Bp);
Expand Down Expand Up @@ -412,10 +413,10 @@ bool index_lt_(
}

/**
* @brief Inplace sort of minor axis, used to canonicalize the CSx ordering.
* @brief In-place sort of minor axis, used to canonicalize the CSx ordering.
*/
template <class VALUE, class CSX_MINOR_IDX, class CSX_MAJOR_IDX>
void sort_indices(
void sort_csx_indices(
ThreadPool* const tp,
uint64_t n_row,
uint64_t nnz,
Expand Down Expand Up @@ -460,7 +461,7 @@ void sort_indices(
*
*/
template <class VALUE, class CSX_MINOR_IDX, class CSX_MAJOR_IDX>
void copy_to_dense(
void copy_csx_to_dense(
ThreadPool* const tp,
uint64_t major_start,
uint64_t major_end,
Expand Down Expand Up @@ -502,14 +503,15 @@ void copy_to_dense(
});
assert(status.ok());
} else {
// parallelizing this is less beneficial than the CSR case as it
// Parallelizing this is less beneficial than the CSR case as it
// partitions by columns, but writes in C order. This does not align
// with CPU cache lines, and is therefore quite a bit slower than
// the speedup achieved by the (above) CSR loop.
//
// We could write a Fortran-ordered output array, but then it would
// typically need to be converted back to C order, which is even
// worse as you move (even) more data once it is dense.
// We could write a Fortran-ordered ("column major") output array, but
// then it would typically need to be converted back to C order ("row
// major"), which is even worse as you move (even) more data once it is
// dense.
//
// If there is ever a requirement for use of F-ordered data, we
// could add the output order as an option.
Expand Down

0 comments on commit bc866f8

Please sign in to comment.