From 6aa1ffbf0fcd9f4e0a403294dffbc62c151bb621 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 21 Feb 2024 10:52:14 -0500 Subject: [PATCH] Python: Scalar FFI import & export (#525) Starts work on accepting scalar input into functions. Implements conversions from `__geo_interface__` to geoarrow scalars. Todo: - [x] Export geoarrow scalars via `__arrow_c_array__` - [x] Import geoarrow scalars via `__arrow_c_array__` - [x] Import geoarrow scalars via `__geo_interface__` (on each scalar class, not just on the `ScalarInput` wrapper) Closes https://github.com/geoarrow/geoarrow-rs/issues/524, closes https://github.com/geoarrow/geoarrow-rs/issues/436 --- .../core/python/geoarrow/rust/core/_rust.pyi | 30 ++ python/core/src/ffi/from_python.rs | 363 ------------------ python/core/src/ffi/from_python/array.rs | 68 ++++ python/core/src/ffi/from_python/chunked.rs | 163 ++++++++ .../ffi_stream.rs} | 0 python/core/src/ffi/from_python/input.rs | 120 ++++++ python/core/src/ffi/from_python/mod.rs | 9 + python/core/src/ffi/from_python/scalar.rs | 109 ++++++ python/core/src/ffi/from_python/table.rs | 44 +++ python/core/src/ffi/from_python/utils.rs | 73 ++++ python/core/src/ffi/mod.rs | 1 - python/core/src/ffi/to_python/array.rs | 24 +- python/core/src/ffi/to_python/mod.rs | 1 + python/core/src/ffi/to_python/scalar.rs | 36 ++ .../src/interop/geopandas/from_geopandas.rs | 2 +- python/core/src/interop/shapely/to_shapely.rs | 2 +- python/core/src/io/ewkb.rs | 2 +- python/core/src/io/wkt.rs | 2 +- src/io/geozero/mod.rs | 12 +- src/io/geozero/scalar/geometry.rs | 17 +- src/io/geozero/scalar/mod.rs | 2 + src/scalar/binary/owned.rs | 9 +- src/scalar/geometry/owned.rs | 9 +- src/scalar/geometrycollection/owned.rs | 10 +- src/scalar/linestring/owned.rs | 11 +- src/scalar/multilinestring/owned.rs | 16 +- src/scalar/multipoint/owned.rs | 10 +- src/scalar/multipolygon/owned.rs | 17 +- src/scalar/point/owned.rs | 10 +- src/scalar/polygon/owned.rs | 16 +- src/scalar/rect/owned.rs | 9 +- 31 files changed, 792 insertions(+), 405 deletions(-) delete mode 100644 python/core/src/ffi/from_python.rs create mode 100644 python/core/src/ffi/from_python/array.rs create mode 100644 python/core/src/ffi/from_python/chunked.rs rename python/core/src/ffi/{stream_chunked.rs => from_python/ffi_stream.rs} (100%) create mode 100644 python/core/src/ffi/from_python/input.rs create mode 100644 python/core/src/ffi/from_python/mod.rs create mode 100644 python/core/src/ffi/from_python/scalar.rs create mode 100644 python/core/src/ffi/from_python/table.rs create mode 100644 python/core/src/ffi/from_python/utils.rs create mode 100644 python/core/src/ffi/to_python/scalar.rs diff --git a/python/core/python/geoarrow/rust/core/_rust.pyi b/python/core/python/geoarrow/rust/core/_rust.pyi index b8dc948a1..5423559c8 100644 --- a/python/core/python/geoarrow/rust/core/_rust.pyi +++ b/python/core/python/geoarrow/rust/core/_rust.pyi @@ -35,57 +35,87 @@ from .types import ( from .enums import AreaMethod, LengthMethod, SimplifyMethod class Point: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class LineString: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class Polygon: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class MultiPoint: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class MultiLineString: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class MultiPolygon: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class Geometry: + # def __arrow_c_array__( + # self, requested_schema: object | None = None + # ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class GeometryCollection: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... def _repr_svg_(self) -> str: ... class WKB: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... class Rect: + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... def __eq__(self, other: Self) -> bool: ... class PointArray: diff --git a/python/core/src/ffi/from_python.rs b/python/core/src/ffi/from_python.rs deleted file mode 100644 index 92f021032..000000000 --- a/python/core/src/ffi/from_python.rs +++ /dev/null @@ -1,363 +0,0 @@ -use std::sync::Arc; - -use crate::array::*; -use crate::chunked_array::*; -use crate::ffi::stream_chunked::ArrowArrayStreamReader; -use crate::table::GeoTable; -use arrow::datatypes::Field; -use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; -use arrow::ffi_stream::{ - ArrowArrayStreamReader as ArrowRecordBatchStreamReader, FFI_ArrowArrayStream, -}; -use arrow_array::Array; -use arrow_array::{make_array, ArrayRef, RecordBatchReader}; -use geoarrow::array::from_arrow_array; -use geoarrow::chunked_array::{from_arrow_chunks, ChunkedGeometryArrayTrait}; -use geoarrow::datatypes::GeoDataType; -use geoarrow::GeometryArrayTrait; -use pyo3::exceptions::{PyTypeError, PyValueError}; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyTuple, PyType}; -use pyo3::{PyAny, PyResult}; - -macro_rules! impl_from_py_object { - ($struct_name:ident, $geoarrow_arr:ty) => { - impl<'a> FromPyObject<'a> for $struct_name { - fn extract(ob: &'a PyAny) -> PyResult { - let (array, _field) = import_arrow_c_array(ob)?; - let geo_array = <$geoarrow_arr>::try_from(array.as_ref()) - .map_err(|err| PyTypeError::new_err(err.to_string()))?; - Ok(geo_array.into()) - } - } - }; -} - -impl_from_py_object!(WKBArray, geoarrow::array::WKBArray); -impl_from_py_object!(PointArray, geoarrow::array::PointArray); -impl_from_py_object!(LineStringArray, geoarrow::array::LineStringArray); -impl_from_py_object!(PolygonArray, geoarrow::array::PolygonArray); -impl_from_py_object!(MultiPointArray, geoarrow::array::MultiPointArray); -impl_from_py_object!( - MultiLineStringArray, - geoarrow::array::MultiLineStringArray -); -impl_from_py_object!(MultiPolygonArray, geoarrow::array::MultiPolygonArray); -impl_from_py_object!(MixedGeometryArray, geoarrow::array::MixedGeometryArray); -// impl_from_py_object!(RectArray); -impl_from_py_object!( - GeometryCollectionArray, - geoarrow::array::GeometryCollectionArray -); - -macro_rules! impl_from_arrow { - ($struct_name:ident) => { - #[pymethods] - impl $struct_name { - /// Construct this object from existing Arrow data - /// - /// Args: - /// input: Arrow array to use for constructing this object - /// - /// Returns: - /// Self - #[classmethod] - pub fn from_arrow(_cls: &PyType, input: &PyAny) -> PyResult { - input.extract() - } - } - }; -} - -impl_from_arrow!(WKBArray); -impl_from_arrow!(PointArray); -impl_from_arrow!(LineStringArray); -impl_from_arrow!(PolygonArray); -impl_from_arrow!(MultiPointArray); -impl_from_arrow!(MultiLineStringArray); -impl_from_arrow!(MultiPolygonArray); -impl_from_arrow!(MixedGeometryArray); -// impl_from_arrow!(RectArray); -impl_from_arrow!(GeometryCollectionArray); -impl_from_arrow!(GeoTable); - -macro_rules! impl_from_arrow_chunks { - ($py_chunked_array:ty, $py_array:ty, $rs_chunked_array:ty) => { - #[pymethods] - impl $py_chunked_array { - /// Construct this chunked array from existing Arrow data - /// - /// This is a temporary workaround for [this pyarrow - /// issue](https://github.com/apache/arrow/issues/38717), where it's currently impossible to - /// read a pyarrow [`ChunkedArray`][pyarrow.ChunkedArray] directly without adding a direct - /// dependency on pyarrow. - /// - /// Args: - /// input: Arrow arrays to use for constructing this object - /// - /// Returns: - /// Self - #[classmethod] - fn from_arrow_arrays(_cls: &PyType, input: Vec<&PyAny>) -> PyResult { - let py_arrays = input - .into_iter() - .map(|x| x.extract()) - .collect::>>()?; - Ok(<$rs_chunked_array>::new( - py_arrays.into_iter().map(|py_array| py_array.0).collect(), - ) - .into()) - } - } - }; -} - -impl_from_arrow_chunks!( - ChunkedPointArray, - PointArray, - geoarrow::chunked_array::ChunkedPointArray -); -impl_from_arrow_chunks!( - ChunkedLineStringArray, - LineStringArray, - geoarrow::chunked_array::ChunkedLineStringArray -); -impl_from_arrow_chunks!( - ChunkedPolygonArray, - PolygonArray, - geoarrow::chunked_array::ChunkedPolygonArray -); -impl_from_arrow_chunks!( - ChunkedMultiPointArray, - MultiPointArray, - geoarrow::chunked_array::ChunkedMultiPointArray -); -impl_from_arrow_chunks!( - ChunkedMultiLineStringArray, - MultiLineStringArray, - geoarrow::chunked_array::ChunkedMultiLineStringArray -); -impl_from_arrow_chunks!( - ChunkedMultiPolygonArray, - MultiPolygonArray, - geoarrow::chunked_array::ChunkedMultiPolygonArray -); -impl_from_arrow_chunks!( - ChunkedMixedGeometryArray, - MixedGeometryArray, - geoarrow::chunked_array::ChunkedMixedGeometryArray -); -// impl_from_arrow_chunks!( -// ChunkedRectArray, -// RectArray, -// geoarrow::chunked_array::ChunkedRectArray -// ); -impl_from_arrow_chunks!( - ChunkedGeometryCollectionArray, - GeometryCollectionArray, - geoarrow::chunked_array::ChunkedGeometryCollectionArray -); -impl_from_arrow_chunks!( - ChunkedWKBArray, - WKBArray, - geoarrow::chunked_array::ChunkedWKBArray -); - -impl<'a> FromPyObject<'a> for GeoTable { - fn extract(ob: &'a PyAny) -> PyResult { - let stream = import_arrow_c_stream(ob)?; - let stream_reader = ArrowRecordBatchStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - let schema = stream_reader.schema(); - - let mut batches = vec![]; - for batch in stream_reader { - let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; - batches.push(batch); - } - - let table = geoarrow::table::GeoTable::from_arrow(batches, schema, None, None) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - - if let Ok(data_type) = table.geometry_data_type() { - match data_type { - GeoDataType::LargeLineString(_) - | GeoDataType::LargePolygon(_) - | GeoDataType::LargeMultiPoint(_) - | GeoDataType::LargeMultiLineString(_) - | GeoDataType::LargeMultiPolygon(_) - | GeoDataType::LargeMixed(_) - | GeoDataType::LargeWKB - | GeoDataType::LargeGeometryCollection(_) => return Err(PyValueError::new_err( - "Unable to downcast from large to small offsets. Are your offsets 2^31 long?", - )), - _ => (), - } - } - - Ok(table.into()) - } -} - -fn validate_pycapsule(capsule: &PyCapsule, expected_name: &str) -> PyResult<()> { - let capsule_name = capsule.name()?; - if let Some(capsule_name) = capsule_name { - let capsule_name = capsule_name.to_str()?; - if capsule_name != expected_name { - return Err(PyValueError::new_err(format!( - "Expected name '{}' in PyCapsule, instead got '{}'", - expected_name, capsule_name - ))); - } - } else { - return Err(PyValueError::new_err( - "Expected schema PyCapsule to have name set.", - )); - } - - Ok(()) -} - -/// Import __arrow_c_array__ -pub(crate) fn import_arrow_c_array(ob: &PyAny) -> PyResult<(ArrayRef, Field)> { - if !ob.hasattr("__arrow_c_array__")? { - return Err(PyValueError::new_err( - "Expected an object with dunder __arrow_c_array__", - )); - } - - let tuple = ob.getattr("__arrow_c_array__")?.call0()?; - if !tuple.is_instance_of::() { - return Err(PyTypeError::new_err( - "Expected __arrow_c_array__ to return a tuple.", - )); - } - - let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; - let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; - - validate_pycapsule(schema_capsule, "arrow_schema")?; - validate_pycapsule(array_capsule, "arrow_array")?; - - let schema_ptr = unsafe { schema_capsule.reference::() }; - let array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer() as _) }; - - let array_data = unsafe { arrow::ffi::from_ffi(array, schema_ptr) } - .map_err(|err| PyTypeError::new_err(err.to_string()))?; - let field = Field::try_from(schema_ptr).map_err(|err| PyTypeError::new_err(err.to_string()))?; - Ok((make_array(array_data), field)) -} - -pub(crate) fn import_arrow_c_stream(ob: &PyAny) -> PyResult { - if !ob.hasattr("__arrow_c_stream__")? { - return Err(PyValueError::new_err( - "Expected an object with dunder __arrow_c_stream__", - )); - } - - let capsule: &PyCapsule = PyTryInto::try_into(ob.getattr("__arrow_c_stream__")?.call0()?)?; - validate_pycapsule(capsule, "arrow_array_stream")?; - - let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) }; - Ok(stream) -} - -pub struct ArrayInput(pub Arc); - -impl<'a> FromPyObject<'a> for ArrayInput { - fn extract(ob: &'a PyAny) -> PyResult { - let (array, _field) = import_arrow_c_array(ob)?; - Ok(Self(array)) - } -} - -pub struct ChunkedArrayInput(pub Vec>); - -impl<'a> FromPyObject<'a> for ChunkedArrayInput { - fn extract(ob: &'a PyAny) -> PyResult { - let stream = import_arrow_c_stream(ob)?; - let stream_reader = ArrowArrayStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - - let mut chunks = vec![]; - for batch in stream_reader { - let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; - chunks.push(batch); - } - Ok(Self(chunks)) - } -} - -pub enum AnyArrayInput { - Array(Arc), - Chunked(Vec>), -} - -impl<'a> FromPyObject<'a> for AnyArrayInput { - fn extract(ob: &'a PyAny) -> PyResult { - if ob.hasattr("__arrow_c_array__")? { - Ok(Self::Array(ArrayInput::extract(ob)?.0)) - } else if ob.hasattr("__arrow_c_stream__")? { - Ok(Self::Chunked(ChunkedArrayInput::extract(ob)?.0)) - } else { - Err(PyValueError::new_err( - "Expected object with __arrow_c_array__ or __arrow_c_stream__ method", - )) - } - } -} - -pub struct GeometryArrayInput(pub Arc); - -impl<'a> FromPyObject<'a> for GeometryArrayInput { - fn extract(ob: &'a PyAny) -> PyResult { - let (array, field) = import_arrow_c_array(ob)?; - let array = from_arrow_array(&array, &field) - .map_err(|err| PyTypeError::new_err(err.to_string()))?; - Ok(Self(array)) - } -} - -pub struct ChunkedGeometryArrayInput(pub Arc); - -impl<'a> FromPyObject<'a> for ChunkedGeometryArrayInput { - fn extract(ob: &'a PyAny) -> PyResult { - let stream = import_arrow_c_stream(ob)?; - let stream_reader = ArrowArrayStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - let field = stream_reader.field(); - - let mut chunks = vec![]; - for batch in stream_reader { - let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; - chunks.push(batch); - } - - let chunk_refs = chunks - .iter() - .map(|chunk| chunk.as_ref()) - .collect::>(); - let chunked_array = from_arrow_chunks(&chunk_refs, &field) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - Ok(Self(chunked_array)) - } -} - -pub enum AnyGeometryInput { - Array(Arc), - Chunked(Arc), -} - -impl<'a> FromPyObject<'a> for AnyGeometryInput { - fn extract(ob: &'a PyAny) -> PyResult { - if ob.hasattr("__arrow_c_array__")? { - Ok(Self::Array(GeometryArrayInput::extract(ob)?.0)) - } else if ob.hasattr("__arrow_c_stream__")? { - Ok(Self::Chunked(ChunkedGeometryArrayInput::extract(ob)?.0)) - } else { - Err(PyValueError::new_err( - "Expected object with __arrow_c_array__ or __arrow_c_stream__ method", - )) - } - } -} diff --git a/python/core/src/ffi/from_python/array.rs b/python/core/src/ffi/from_python/array.rs new file mode 100644 index 000000000..1fc426c4a --- /dev/null +++ b/python/core/src/ffi/from_python/array.rs @@ -0,0 +1,68 @@ +use crate::array::*; +use crate::ffi::from_python::utils::import_arrow_c_array; +use crate::table::GeoTable; +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; +use pyo3::types::PyType; +use pyo3::{PyAny, PyResult}; + +macro_rules! impl_from_py_object { + ($struct_name:ident, $geoarrow_arr:ty) => { + impl<'a> FromPyObject<'a> for $struct_name { + fn extract(ob: &'a PyAny) -> PyResult { + let (array, _field) = import_arrow_c_array(ob)?; + let geo_array = <$geoarrow_arr>::try_from(array.as_ref()) + .map_err(|err| PyTypeError::new_err(err.to_string()))?; + Ok(geo_array.into()) + } + } + }; +} + +impl_from_py_object!(WKBArray, geoarrow::array::WKBArray); +impl_from_py_object!(PointArray, geoarrow::array::PointArray); +impl_from_py_object!(LineStringArray, geoarrow::array::LineStringArray); +impl_from_py_object!(PolygonArray, geoarrow::array::PolygonArray); +impl_from_py_object!(MultiPointArray, geoarrow::array::MultiPointArray); +impl_from_py_object!( + MultiLineStringArray, + geoarrow::array::MultiLineStringArray +); +impl_from_py_object!(MultiPolygonArray, geoarrow::array::MultiPolygonArray); +impl_from_py_object!(MixedGeometryArray, geoarrow::array::MixedGeometryArray); +// impl_from_py_object!(RectArray); +impl_from_py_object!( + GeometryCollectionArray, + geoarrow::array::GeometryCollectionArray +); + +macro_rules! impl_from_arrow { + ($struct_name:ident) => { + #[pymethods] + impl $struct_name { + /// Construct this object from existing Arrow data + /// + /// Args: + /// input: Arrow array to use for constructing this object + /// + /// Returns: + /// Self + #[classmethod] + pub fn from_arrow(_cls: &PyType, input: &PyAny) -> PyResult { + input.extract() + } + } + }; +} + +impl_from_arrow!(WKBArray); +impl_from_arrow!(PointArray); +impl_from_arrow!(LineStringArray); +impl_from_arrow!(PolygonArray); +impl_from_arrow!(MultiPointArray); +impl_from_arrow!(MultiLineStringArray); +impl_from_arrow!(MultiPolygonArray); +impl_from_arrow!(MixedGeometryArray); +// impl_from_arrow!(RectArray); +impl_from_arrow!(GeometryCollectionArray); +impl_from_arrow!(GeoTable); diff --git a/python/core/src/ffi/from_python/chunked.rs b/python/core/src/ffi/from_python/chunked.rs new file mode 100644 index 000000000..3ae4da1ef --- /dev/null +++ b/python/core/src/ffi/from_python/chunked.rs @@ -0,0 +1,163 @@ +use crate::array::*; +use crate::chunked_array::*; +use crate::ffi::from_python::ffi_stream::ArrowArrayStreamReader; +use crate::ffi::from_python::utils::import_arrow_c_stream; +use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::prelude::*; +use pyo3::types::PyType; +use pyo3::{PyAny, PyResult}; + +macro_rules! impl_extract { + ($py_chunked_array:ty, $rs_array:ty, $rs_chunked_array:ty) => { + impl<'a> FromPyObject<'a> for $py_chunked_array { + fn extract(ob: &'a PyAny) -> PyResult { + let stream = import_arrow_c_stream(ob)?; + let stream_reader = ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + let mut geo_chunks = vec![]; + for array in stream_reader { + let array = array.map_err(|err| PyTypeError::new_err(err.to_string()))?; + let geo_array = <$rs_array>::try_from(array.as_ref()) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + geo_chunks.push(geo_array); + } + + Ok(Self(<$rs_chunked_array>::new(geo_chunks))) + } + } + }; +} + +impl_extract!( + ChunkedPointArray, + geoarrow::array::PointArray, + geoarrow::chunked_array::ChunkedPointArray +); +impl_extract!( + ChunkedLineStringArray, + geoarrow::array::LineStringArray, + geoarrow::chunked_array::ChunkedLineStringArray +); +impl_extract!( + ChunkedPolygonArray, + geoarrow::array::PolygonArray, + geoarrow::chunked_array::ChunkedPolygonArray +); +impl_extract!( + ChunkedMultiPointArray, + geoarrow::array::MultiPointArray, + geoarrow::chunked_array::ChunkedMultiPointArray +); +impl_extract!( + ChunkedMultiLineStringArray, + geoarrow::array::MultiLineStringArray, + geoarrow::chunked_array::ChunkedMultiLineStringArray +); +impl_extract!( + ChunkedMultiPolygonArray, + geoarrow::array::MultiPolygonArray, + geoarrow::chunked_array::ChunkedMultiPolygonArray +); +impl_extract!( + ChunkedMixedGeometryArray, + geoarrow::array::MixedGeometryArray, + geoarrow::chunked_array::ChunkedMixedGeometryArray +); +// impl_extract!( +// ChunkedRectArray, +// geoarrow::array::RectArray, +// geoarrow::chunked_array::ChunkedRectArray +// ); +impl_extract!( + ChunkedGeometryCollectionArray, + geoarrow::array::GeometryCollectionArray, + geoarrow::chunked_array::ChunkedGeometryCollectionArray +); +impl_extract!( + ChunkedWKBArray, + geoarrow::array::WKBArray, + geoarrow::chunked_array::ChunkedWKBArray +); + +macro_rules! impl_from_arrow_chunks { + ($py_chunked_array:ty, $py_array:ty, $rs_chunked_array:ty) => { + #[pymethods] + impl $py_chunked_array { + /// Construct this chunked array from existing Arrow data + /// + /// This is a temporary workaround for [this pyarrow + /// issue](https://github.com/apache/arrow/issues/38717), where it's currently impossible to + /// read a pyarrow [`ChunkedArray`][pyarrow.ChunkedArray] directly without adding a direct + /// dependency on pyarrow. + /// + /// Args: + /// input: Arrow arrays to use for constructing this object + /// + /// Returns: + /// Self + #[classmethod] + fn from_arrow_arrays(_cls: &PyType, input: Vec<&PyAny>) -> PyResult { + let py_arrays = input + .into_iter() + .map(|x| x.extract()) + .collect::>>()?; + Ok(<$rs_chunked_array>::new( + py_arrays.into_iter().map(|py_array| py_array.0).collect(), + ) + .into()) + } + } + }; +} + +impl_from_arrow_chunks!( + ChunkedPointArray, + PointArray, + geoarrow::chunked_array::ChunkedPointArray +); +impl_from_arrow_chunks!( + ChunkedLineStringArray, + LineStringArray, + geoarrow::chunked_array::ChunkedLineStringArray +); +impl_from_arrow_chunks!( + ChunkedPolygonArray, + PolygonArray, + geoarrow::chunked_array::ChunkedPolygonArray +); +impl_from_arrow_chunks!( + ChunkedMultiPointArray, + MultiPointArray, + geoarrow::chunked_array::ChunkedMultiPointArray +); +impl_from_arrow_chunks!( + ChunkedMultiLineStringArray, + MultiLineStringArray, + geoarrow::chunked_array::ChunkedMultiLineStringArray +); +impl_from_arrow_chunks!( + ChunkedMultiPolygonArray, + MultiPolygonArray, + geoarrow::chunked_array::ChunkedMultiPolygonArray +); +impl_from_arrow_chunks!( + ChunkedMixedGeometryArray, + MixedGeometryArray, + geoarrow::chunked_array::ChunkedMixedGeometryArray +); +// impl_from_arrow_chunks!( +// ChunkedRectArray, +// RectArray, +// geoarrow::chunked_array::ChunkedRectArray +// ); +impl_from_arrow_chunks!( + ChunkedGeometryCollectionArray, + GeometryCollectionArray, + geoarrow::chunked_array::ChunkedGeometryCollectionArray +); +impl_from_arrow_chunks!( + ChunkedWKBArray, + WKBArray, + geoarrow::chunked_array::ChunkedWKBArray +); diff --git a/python/core/src/ffi/stream_chunked.rs b/python/core/src/ffi/from_python/ffi_stream.rs similarity index 100% rename from python/core/src/ffi/stream_chunked.rs rename to python/core/src/ffi/from_python/ffi_stream.rs diff --git a/python/core/src/ffi/from_python/input.rs b/python/core/src/ffi/from_python/input.rs new file mode 100644 index 000000000..491340681 --- /dev/null +++ b/python/core/src/ffi/from_python/input.rs @@ -0,0 +1,120 @@ +use std::sync::Arc; + +use crate::ffi::from_python::ffi_stream::ArrowArrayStreamReader; +use crate::ffi::from_python::utils::{import_arrow_c_array, import_arrow_c_stream}; +use crate::scalar::Geometry; +use arrow_array::Array; +use geoarrow::array::from_arrow_array; +use geoarrow::chunked_array::{from_arrow_chunks, ChunkedGeometryArrayTrait}; +use geoarrow::GeometryArrayTrait; +use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::prelude::*; +use pyo3::{PyAny, PyResult}; + +pub struct ArrayInput(pub Arc); + +impl<'a> FromPyObject<'a> for ArrayInput { + fn extract(ob: &'a PyAny) -> PyResult { + let (array, _field) = import_arrow_c_array(ob)?; + Ok(Self(array)) + } +} + +pub struct ChunkedArrayInput(pub Vec>); + +impl<'a> FromPyObject<'a> for ChunkedArrayInput { + fn extract(ob: &'a PyAny) -> PyResult { + let stream = import_arrow_c_stream(ob)?; + let stream_reader = ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + let mut chunks = vec![]; + for batch in stream_reader { + let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; + chunks.push(batch); + } + Ok(Self(chunks)) + } +} + +pub enum AnyArrayInput { + Array(Arc), + Chunked(Vec>), +} + +impl<'a> FromPyObject<'a> for AnyArrayInput { + fn extract(ob: &'a PyAny) -> PyResult { + if ob.hasattr("__arrow_c_array__")? { + Ok(Self::Array(ArrayInput::extract(ob)?.0)) + } else if ob.hasattr("__arrow_c_stream__")? { + Ok(Self::Chunked(ChunkedArrayInput::extract(ob)?.0)) + } else { + Err(PyValueError::new_err( + "Expected object with __arrow_c_array__ or __arrow_c_stream__ method", + )) + } + } +} + +pub struct GeometryScalarInput(pub geoarrow::scalar::OwnedGeometry); + +impl<'a> FromPyObject<'a> for GeometryScalarInput { + fn extract(ob: &'a PyAny) -> PyResult { + Ok(Self(ob.extract::()?.0)) + } +} + +pub struct GeometryArrayInput(pub Arc); + +impl<'a> FromPyObject<'a> for GeometryArrayInput { + fn extract(ob: &'a PyAny) -> PyResult { + let (array, field) = import_arrow_c_array(ob)?; + let array = from_arrow_array(&array, &field) + .map_err(|err| PyTypeError::new_err(err.to_string()))?; + Ok(Self(array)) + } +} + +pub struct ChunkedGeometryArrayInput(pub Arc); + +impl<'a> FromPyObject<'a> for ChunkedGeometryArrayInput { + fn extract(ob: &'a PyAny) -> PyResult { + let stream = import_arrow_c_stream(ob)?; + let stream_reader = ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + let field = stream_reader.field(); + + let mut chunks = vec![]; + for batch in stream_reader { + let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; + chunks.push(batch); + } + + let chunk_refs = chunks + .iter() + .map(|chunk| chunk.as_ref()) + .collect::>(); + let chunked_array = from_arrow_chunks(&chunk_refs, &field) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + Ok(Self(chunked_array)) + } +} + +pub enum AnyGeometryInput { + Array(Arc), + Chunked(Arc), +} + +impl<'a> FromPyObject<'a> for AnyGeometryInput { + fn extract(ob: &'a PyAny) -> PyResult { + if ob.hasattr("__arrow_c_array__")? { + Ok(Self::Array(GeometryArrayInput::extract(ob)?.0)) + } else if ob.hasattr("__arrow_c_stream__")? { + Ok(Self::Chunked(ChunkedGeometryArrayInput::extract(ob)?.0)) + } else { + Err(PyValueError::new_err( + "Expected object with __arrow_c_array__ or __arrow_c_stream__ method", + )) + } + } +} diff --git a/python/core/src/ffi/from_python/mod.rs b/python/core/src/ffi/from_python/mod.rs new file mode 100644 index 000000000..dab754465 --- /dev/null +++ b/python/core/src/ffi/from_python/mod.rs @@ -0,0 +1,9 @@ +pub mod array; +pub mod chunked; +pub mod ffi_stream; +pub mod input; +pub mod scalar; +pub mod table; +pub mod utils; + +pub use input::{AnyGeometryInput, GeometryArrayInput}; diff --git a/python/core/src/ffi/from_python/scalar.rs b/python/core/src/ffi/from_python/scalar.rs new file mode 100644 index 000000000..6eac6d00c --- /dev/null +++ b/python/core/src/ffi/from_python/scalar.rs @@ -0,0 +1,109 @@ +use crate::array::*; +use crate::scalar::*; +use geoarrow::io::geozero::ToGeometry; +use geoarrow::scalar::OwnedGeometry; +use geoarrow::trait_::GeometryArrayAccessor; +use geoarrow::GeometryArrayTrait; +use geozero::geojson::GeoJsonString; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyTuple}; +use pyo3::{intern, PyAny, PyResult}; + +/// Access Python `__geo_interface__` attribute and encode to JSON string +fn call_geo_interface(py: Python, ob: &PyAny) -> PyResult { + let py_obj = ob.getattr("__geo_interface__")?; + + // Import JSON module + let json_mod = py.import(intern!(py, "json"))?; + + // Prepare json.dumps call + let args = (py_obj,); + let separators = PyTuple::new(py, vec![',', ':']); + let kwargs = PyDict::new(py); + kwargs.set_item("separators", separators)?; + + // Call json.dumps + let json_dumped = json_mod.call_method(intern!(py, "dumps"), args, Some(kwargs))?; + json_dumped.extract() +} + +macro_rules! impl_extract { + ($py_scalar:ty, $py_array:ty, $rs_scalar_variant:path) => { + impl<'a> FromPyObject<'a> for $py_scalar { + fn extract(ob: &'a PyAny) -> PyResult { + if ob.hasattr("__arrow_c_array__")? { + let arr = ob.extract::<$py_array>()?; + if arr.0.len() != 1 { + return Err(PyValueError::new_err( + "Expected scalar input; found != 1 elements in input array.", + )); + } + let scalar = arr.0.value(0); + Ok(Self(scalar.into())) + } else if ob.hasattr("__geo_interface__")? { + let json_string = Python::with_gil(|py| call_geo_interface(py, ob))?; + + // Parse GeoJSON to geometry scalar + let reader = GeoJsonString(json_string); + let geom = ToGeometry::::to_geometry(&reader).map_err(|err| { + PyValueError::new_err(format!("Unable to parse GeoJSON String: {}", err)) + })?; + let geom = match geom { + $rs_scalar_variant(g) => g, + _ => return Err(PyValueError::new_err("Unexpected geometry type.")), + }; + Ok(Self(geom)) + } else { + Err(PyValueError::new_err( + "Expected GeoArrow scalar or object implementing Geo Interface.", + )) + } + } + } + }; +} + +impl_extract!(Point, PointArray, OwnedGeometry::Point); +impl_extract!(LineString, LineStringArray, OwnedGeometry::LineString); +impl_extract!(Polygon, PolygonArray, OwnedGeometry::Polygon); +impl_extract!(MultiPoint, MultiPointArray, OwnedGeometry::MultiPoint); +impl_extract!( + MultiLineString, + MultiLineStringArray, + OwnedGeometry::MultiLineString +); +impl_extract!(MultiPolygon, MultiPolygonArray, OwnedGeometry::MultiPolygon); +impl_extract!( + GeometryCollection, + GeometryCollectionArray, + OwnedGeometry::GeometryCollection +); + +impl<'a> FromPyObject<'a> for Geometry { + fn extract(ob: &'a PyAny) -> PyResult { + if ob.hasattr("__arrow_c_array__")? { + let arr = ob.extract::()?; + if arr.0.len() != 1 { + return Err(PyValueError::new_err( + "Expected scalar input; found != 1 elements in input array.", + )); + } + let scalar = arr.0.value(0); + Ok(Self(scalar.into())) + } else if ob.hasattr("__geo_interface__")? { + let json_string = Python::with_gil(|py| call_geo_interface(py, ob))?; + + // Parse GeoJSON to geometry scalar + let reader = GeoJsonString(json_string); + let geom = ToGeometry::::to_geometry(&reader).map_err(|err| { + PyValueError::new_err(format!("Unable to parse GeoJSON String: {}", err)) + })?; + Ok(Self(geom)) + } else { + Err(PyValueError::new_err( + "Expected GeoArrow scalar or object implementing Geo Interface.", + )) + } + } +} diff --git a/python/core/src/ffi/from_python/table.rs b/python/core/src/ffi/from_python/table.rs new file mode 100644 index 000000000..bd856c241 --- /dev/null +++ b/python/core/src/ffi/from_python/table.rs @@ -0,0 +1,44 @@ +use crate::ffi::from_python::utils::import_arrow_c_stream; +use crate::table::GeoTable; +use arrow::ffi_stream::ArrowArrayStreamReader as ArrowRecordBatchStreamReader; +use arrow_array::RecordBatchReader; +use geoarrow::datatypes::GeoDataType; +use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::prelude::*; +use pyo3::{PyAny, PyResult}; + +impl<'a> FromPyObject<'a> for GeoTable { + fn extract(ob: &'a PyAny) -> PyResult { + let stream = import_arrow_c_stream(ob)?; + let stream_reader = ArrowRecordBatchStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + let schema = stream_reader.schema(); + + let mut batches = vec![]; + for batch in stream_reader { + let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; + batches.push(batch); + } + + let table = geoarrow::table::GeoTable::from_arrow(batches, schema, None, None) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + if let Ok(data_type) = table.geometry_data_type() { + match data_type { + GeoDataType::LargeLineString(_) + | GeoDataType::LargePolygon(_) + | GeoDataType::LargeMultiPoint(_) + | GeoDataType::LargeMultiLineString(_) + | GeoDataType::LargeMultiPolygon(_) + | GeoDataType::LargeMixed(_) + | GeoDataType::LargeWKB + | GeoDataType::LargeGeometryCollection(_) => return Err(PyValueError::new_err( + "Unable to downcast from large to small offsets. Are your offsets 2^31 long?", + )), + _ => (), + } + } + + Ok(table.into()) + } +} diff --git a/python/core/src/ffi/from_python/utils.rs b/python/core/src/ffi/from_python/utils.rs new file mode 100644 index 000000000..ee852be31 --- /dev/null +++ b/python/core/src/ffi/from_python/utils.rs @@ -0,0 +1,73 @@ +use arrow::datatypes::Field; +use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; +use arrow::ffi_stream::FFI_ArrowArrayStream; +use arrow_array::{make_array, ArrayRef}; +use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::prelude::*; +use pyo3::types::{PyCapsule, PyTuple}; +use pyo3::{PyAny, PyResult}; + +/// Validate PyCapsule has provided name +pub fn validate_pycapsule_name(capsule: &PyCapsule, expected_name: &str) -> PyResult<()> { + let capsule_name = capsule.name()?; + if let Some(capsule_name) = capsule_name { + let capsule_name = capsule_name.to_str()?; + if capsule_name != expected_name { + return Err(PyValueError::new_err(format!( + "Expected name '{}' in PyCapsule, instead got '{}'", + expected_name, capsule_name + ))); + } + } else { + return Err(PyValueError::new_err( + "Expected schema PyCapsule to have name set.", + )); + } + + Ok(()) +} + +/// Import `__arrow_c_array__` across Python boundary +pub(crate) fn import_arrow_c_array(ob: &PyAny) -> PyResult<(ArrayRef, Field)> { + if !ob.hasattr("__arrow_c_array__")? { + return Err(PyValueError::new_err( + "Expected an object with dunder __arrow_c_array__", + )); + } + + let tuple = ob.getattr("__arrow_c_array__")?.call0()?; + if !tuple.is_instance_of::() { + return Err(PyTypeError::new_err( + "Expected __arrow_c_array__ to return a tuple.", + )); + } + + let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; + let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; + + validate_pycapsule_name(schema_capsule, "arrow_schema")?; + validate_pycapsule_name(array_capsule, "arrow_array")?; + + let schema_ptr = unsafe { schema_capsule.reference::() }; + let array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer() as _) }; + + let array_data = unsafe { arrow::ffi::from_ffi(array, schema_ptr) } + .map_err(|err| PyTypeError::new_err(err.to_string()))?; + let field = Field::try_from(schema_ptr).map_err(|err| PyTypeError::new_err(err.to_string()))?; + Ok((make_array(array_data), field)) +} + +/// Import `__arrow_c_stream__` across Python boundary. +pub(crate) fn import_arrow_c_stream(ob: &PyAny) -> PyResult { + if !ob.hasattr("__arrow_c_stream__")? { + return Err(PyValueError::new_err( + "Expected an object with dunder __arrow_c_stream__", + )); + } + + let capsule: &PyCapsule = PyTryInto::try_into(ob.getattr("__arrow_c_stream__")?.call0()?)?; + validate_pycapsule_name(capsule, "arrow_array_stream")?; + + let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) }; + Ok(stream) +} diff --git a/python/core/src/ffi/mod.rs b/python/core/src/ffi/mod.rs index ad4268175..74667b027 100644 --- a/python/core/src/ffi/mod.rs +++ b/python/core/src/ffi/mod.rs @@ -1,5 +1,4 @@ //! Arrow FFI via the C Data Interface and the Arrow PyCapsule Interface. pub mod from_python; -pub mod stream_chunked; pub mod to_python; diff --git a/python/core/src/ffi/to_python/array.rs b/python/core/src/ffi/to_python/array.rs index 919c3bc1d..de6d5b4c8 100644 --- a/python/core/src/ffi/to_python/array.rs +++ b/python/core/src/ffi/to_python/array.rs @@ -16,7 +16,7 @@ use std::ffi::CString; use std::sync::Arc; /// Implement the __arrow_c_array__ method on a GeometryArray -macro_rules! impl_arrow_c_array_geometry_array { +macro_rules! impl_arrow_c_array { ($struct_name:ident) => { #[pymethods] impl $struct_name { @@ -27,7 +27,7 @@ macro_rules! impl_arrow_c_array_geometry_array { /// /// For example, you can call [`pyarrow.array()`][pyarrow.array] to convert this array /// into a pyarrow array, without copying memory. - fn __arrow_c_array__( + pub fn __arrow_c_array__( &self, _requested_schema: Option, ) -> PyGeoArrowResult { @@ -49,16 +49,16 @@ macro_rules! impl_arrow_c_array_geometry_array { }; } -impl_arrow_c_array_geometry_array!(PointArray); -impl_arrow_c_array_geometry_array!(LineStringArray); -impl_arrow_c_array_geometry_array!(PolygonArray); -impl_arrow_c_array_geometry_array!(MultiPointArray); -impl_arrow_c_array_geometry_array!(MultiLineStringArray); -impl_arrow_c_array_geometry_array!(MultiPolygonArray); -impl_arrow_c_array_geometry_array!(MixedGeometryArray); -impl_arrow_c_array_geometry_array!(GeometryCollectionArray); -impl_arrow_c_array_geometry_array!(WKBArray); -impl_arrow_c_array_geometry_array!(RectArray); +impl_arrow_c_array!(PointArray); +impl_arrow_c_array!(LineStringArray); +impl_arrow_c_array!(PolygonArray); +impl_arrow_c_array!(MultiPointArray); +impl_arrow_c_array!(MultiLineStringArray); +impl_arrow_c_array!(MultiPolygonArray); +impl_arrow_c_array!(MixedGeometryArray); +impl_arrow_c_array!(GeometryCollectionArray); +impl_arrow_c_array!(WKBArray); +impl_arrow_c_array!(RectArray); pub fn geometry_to_pyobject(py: Python, geom: geoarrow::scalar::Geometry<'_, i32>) -> PyObject { match geom { diff --git a/python/core/src/ffi/to_python/mod.rs b/python/core/src/ffi/to_python/mod.rs index 9f51b8ffd..934c85c5b 100644 --- a/python/core/src/ffi/to_python/mod.rs +++ b/python/core/src/ffi/to_python/mod.rs @@ -1,6 +1,7 @@ pub mod array; pub mod chunked; pub mod ffi_stream; +pub mod scalar; pub mod table; pub use array::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; diff --git a/python/core/src/ffi/to_python/scalar.rs b/python/core/src/ffi/to_python/scalar.rs new file mode 100644 index 000000000..a50defaad --- /dev/null +++ b/python/core/src/ffi/to_python/scalar.rs @@ -0,0 +1,36 @@ +use crate::array::*; +use crate::error::PyGeoArrowResult; +use crate::scalar::*; + +use pyo3::prelude::*; + +macro_rules! impl_arrow_c_array { + ($struct_name:ident, $py_array:ident) => { + #[pymethods] + impl $struct_name { + /// An implementation of the [Arrow PyCapsule + /// Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). + /// This dunder method should not be called directly, but enables zero-copy + /// data transfer to other Python libraries that understand Arrow memory. + /// + /// For example, you can call [`pyarrow.array()`][pyarrow.array] to convert this array + /// into a pyarrow array, without copying memory. + pub fn __arrow_c_array__( + &self, + requested_schema: Option, + ) -> PyGeoArrowResult { + $py_array(self.0.clone().into()).__arrow_c_array__(requested_schema) + } + } + }; +} + +impl_arrow_c_array!(Point, PointArray); +impl_arrow_c_array!(LineString, LineStringArray); +impl_arrow_c_array!(Polygon, PolygonArray); +impl_arrow_c_array!(MultiPoint, MultiPointArray); +impl_arrow_c_array!(MultiLineString, MultiLineStringArray); +impl_arrow_c_array!(MultiPolygon, MultiPolygonArray); +// impl_arrow_c_array!(Geometry, MixedGeometryArray); +impl_arrow_c_array!(GeometryCollection, GeometryCollectionArray); +impl_arrow_c_array!(Rect, RectArray); diff --git a/python/core/src/interop/geopandas/from_geopandas.rs b/python/core/src/interop/geopandas/from_geopandas.rs index 95af50656..cf59ffad3 100644 --- a/python/core/src/interop/geopandas/from_geopandas.rs +++ b/python/core/src/interop/geopandas/from_geopandas.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::array::*; use crate::error::PyGeoArrowResult; -use crate::ffi::from_python::import_arrow_c_stream; +use crate::ffi::from_python::utils::import_arrow_c_stream; use crate::interop::shapely::from_shapely::from_shapely; use crate::interop::util::import_pyarrow; use crate::table::GeoTable; diff --git a/python/core/src/interop/shapely/to_shapely.rs b/python/core/src/interop/shapely/to_shapely.rs index e93abfff3..75dada65d 100644 --- a/python/core/src/interop/shapely/to_shapely.rs +++ b/python/core/src/interop/shapely/to_shapely.rs @@ -1,7 +1,7 @@ use crate::array::*; use crate::chunked_array::*; use crate::error::PyGeoArrowResult; -use crate::ffi::from_python::import_arrow_c_array; +use crate::ffi::from_python::utils::import_arrow_c_array; use crate::interop::shapely::utils::import_shapely; use arrow_buffer::NullBuffer; use geoarrow::array::{from_arrow_array, AsGeometryArray, CoordBuffer, CoordType}; diff --git a/python/core/src/io/ewkb.rs b/python/core/src/io/ewkb.rs index 8a23e6715..bd1c5de20 100644 --- a/python/core/src/io/ewkb.rs +++ b/python/core/src/io/ewkb.rs @@ -10,7 +10,7 @@ use pyo3::types::PyType; use crate::array::*; use crate::error::PyGeoArrowResult; -use crate::ffi::from_python::import_arrow_c_array; +use crate::ffi::from_python::utils::import_arrow_c_array; use crate::ffi::to_python::geometry_array_to_pyobject; /// Parse an Arrow BinaryArray from EWKB to its GeoArrow-native counterpart. diff --git a/python/core/src/io/wkt.rs b/python/core/src/io/wkt.rs index 3c516fa54..875c4435c 100644 --- a/python/core/src/io/wkt.rs +++ b/python/core/src/io/wkt.rs @@ -11,7 +11,7 @@ use pyo3::types::PyType; use crate::array::*; use crate::error::PyGeoArrowResult; -use crate::ffi::from_python::import_arrow_c_array; +use crate::ffi::from_python::utils::import_arrow_c_array; use crate::ffi::to_python::geometry_array_to_pyobject; /// Parse an Arrow StringArray from WKT to its GeoArrow-native counterpart. diff --git a/src/io/geozero/mod.rs b/src/io/geozero/mod.rs index 5ff96d461..69a139a3c 100644 --- a/src/io/geozero/mod.rs +++ b/src/io/geozero/mod.rs @@ -7,10 +7,8 @@ mod scalar; pub(crate) mod table; pub use api::{FromEWKB, FromWKT}; -pub use array::ToLineStringArray; -pub use array::ToMixedArray; -pub use array::ToMultiLineStringArray; -pub use array::ToMultiPointArray; -pub use array::ToMultiPolygonArray; -pub use array::ToPointArray; -pub use array::ToPolygonArray; +pub use array::{ + ToLineStringArray, ToMixedArray, ToMultiLineStringArray, ToMultiPointArray, + ToMultiPolygonArray, ToPointArray, ToPolygonArray, +}; +pub use scalar::ToGeometry; diff --git a/src/io/geozero/scalar/geometry.rs b/src/io/geozero/scalar/geometry.rs index f9c960e59..2d666c470 100644 --- a/src/io/geozero/scalar/geometry.rs +++ b/src/io/geozero/scalar/geometry.rs @@ -6,7 +6,10 @@ use crate::io::geozero::scalar::multipoint::process_multi_point; use crate::io::geozero::scalar::multipolygon::process_multi_polygon; use crate::io::geozero::scalar::point::process_point; use crate::io::geozero::scalar::polygon::process_polygon; -use crate::scalar::Geometry; +use crate::io::geozero::ToMixedArray; +use crate::scalar::{Geometry, OwnedGeometry}; +use crate::trait_::GeometryArrayAccessor; +use crate::GeometryArrayTrait; use arrow_array::OffsetSizeTrait; use geozero::{GeomProcessor, GeozeroGeometry}; @@ -37,3 +40,15 @@ impl GeozeroGeometry for Geometry<'_, O> { process_geometry(&self, 0, processor) } } + +pub trait ToGeometry { + fn to_geometry(&self) -> geozero::error::Result>; +} + +impl ToGeometry for T { + fn to_geometry(&self) -> geozero::error::Result> { + let arr = self.to_mixed_geometry_array()?; + assert_eq!(arr.len(), 1); + Ok(OwnedGeometry::from(arr.value(0))) + } +} diff --git a/src/io/geozero/scalar/mod.rs b/src/io/geozero/scalar/mod.rs index 128207ce9..f8c1c8eaa 100644 --- a/src/io/geozero/scalar/mod.rs +++ b/src/io/geozero/scalar/mod.rs @@ -16,3 +16,5 @@ pub(crate) use multipoint::process_multi_point; pub(crate) use multipolygon::process_multi_polygon; pub(crate) use point::process_point; pub(crate) use polygon::process_polygon; + +pub use geometry::ToGeometry; diff --git a/src/scalar/binary/owned.rs b/src/scalar/binary/owned.rs index ec90fb284..1dd2c6b58 100644 --- a/src/scalar/binary/owned.rs +++ b/src/scalar/binary/owned.rs @@ -1,7 +1,8 @@ +use crate::array::WKBArray; use crate::scalar::WKB; use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub struct OwnedWKB { arr: GenericBinaryArray, geom_index: usize, @@ -31,3 +32,9 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedWKB { Self::new(arr, geom_index) } } + +impl From> for WKBArray { + fn from(value: OwnedWKB) -> Self { + Self::new(value.arr, Default::default()) + } +} diff --git a/src/scalar/geometry/owned.rs b/src/scalar/geometry/owned.rs index 8a7e954e7..86899a4fb 100644 --- a/src/scalar/geometry/owned.rs +++ b/src/scalar/geometry/owned.rs @@ -4,7 +4,7 @@ use crate::algorithm::native::eq::geometry_eq; use crate::geo_traits::{GeometryTrait, GeometryType}; use crate::scalar::*; -#[derive(Debug)] +#[derive(Clone, Debug)] // TODO: come back to this in #449 #[allow(clippy::large_enum_variant)] pub enum OwnedGeometry { @@ -57,6 +57,13 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedGeometry { } } +// impl From> for MixedGeometryArray { +// fn from(value: OwnedGeometry) -> Self { +// match value { +// } +// } +// } + impl GeometryTrait for OwnedGeometry { type T = f64; type Point<'b> = OwnedPoint where Self: 'b; diff --git a/src/scalar/geometrycollection/owned.rs b/src/scalar/geometrycollection/owned.rs index c0ce2590a..301584984 100644 --- a/src/scalar/geometrycollection/owned.rs +++ b/src/scalar/geometrycollection/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::geometry_collection_eq; -use crate::array::MixedGeometryArray; +use crate::array::{GeometryCollectionArray, MixedGeometryArray}; use crate::geo_traits::GeometryCollectionTrait; use crate::scalar::{Geometry, GeometryCollection}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedGeometryCollection { array: MixedGeometryArray, @@ -49,6 +49,12 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedGeometryCo } } +impl From> for GeometryCollectionArray { + fn from(value: OwnedGeometryCollection) -> Self { + Self::new(value.array, value.geom_offsets, None, Default::default()) + } +} + impl GeometryCollectionTrait for OwnedGeometryCollection { type T = f64; type ItemType<'b> = Geometry<'b, O> where Self: 'b; diff --git a/src/scalar/linestring/owned.rs b/src/scalar/linestring/owned.rs index 0504fd7f7..ce3d2fede 100644 --- a/src/scalar/linestring/owned.rs +++ b/src/scalar/linestring/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::line_string_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, LineStringArray}; use crate::geo_traits::LineStringTrait; use crate::scalar::{LineString, Point}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedLineString { coords: CoordBuffer, @@ -43,6 +43,7 @@ impl From> for geo::LineString { geom.into() } } + impl<'a, O: OffsetSizeTrait> From> for OwnedLineString { fn from(value: LineString<'a, O>) -> Self { let (coords, geom_offsets, geom_index) = value.into_owned_inner(); @@ -50,6 +51,12 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedLineString { } } +impl From> for LineStringArray { + fn from(value: OwnedLineString) -> Self { + Self::new(value.coords, value.geom_offsets, None, Default::default()) + } +} + impl LineStringTrait for OwnedLineString { type T = f64; type ItemType<'b> = Point<'b> where Self: 'b; diff --git a/src/scalar/multilinestring/owned.rs b/src/scalar/multilinestring/owned.rs index 34353d220..ae415b8ec 100644 --- a/src/scalar/multilinestring/owned.rs +++ b/src/scalar/multilinestring/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::multi_line_string_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, MultiLineStringArray}; use crate::geo_traits::MultiLineStringTrait; use crate::scalar::{LineString, MultiLineString}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedMultiLineString { coords: CoordBuffer, @@ -69,6 +69,18 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedMultiLineStri } } +impl From> for MultiLineStringArray { + fn from(value: OwnedMultiLineString) -> Self { + Self::new( + value.coords, + value.geom_offsets, + value.ring_offsets, + None, + Default::default(), + ) + } +} + impl MultiLineStringTrait for OwnedMultiLineString { type T = f64; type ItemType<'b> = LineString<'b, O> where Self: 'b; diff --git a/src/scalar/multipoint/owned.rs b/src/scalar/multipoint/owned.rs index e5d2aecc9..b4992c492 100644 --- a/src/scalar/multipoint/owned.rs +++ b/src/scalar/multipoint/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::multi_point_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, MultiPointArray}; use crate::geo_traits::MultiPointTrait; use crate::scalar::{MultiPoint, Point}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedMultiPoint { coords: CoordBuffer, @@ -51,6 +51,12 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedMultiPoint { } } +impl From> for MultiPointArray { + fn from(value: OwnedMultiPoint) -> Self { + Self::new(value.coords, value.geom_offsets, None, Default::default()) + } +} + impl MultiPointTrait for OwnedMultiPoint { type T = f64; type ItemType<'b> = Point<'b> where Self: 'b; diff --git a/src/scalar/multipolygon/owned.rs b/src/scalar/multipolygon/owned.rs index 83f0277ac..cd9a02cc2 100644 --- a/src/scalar/multipolygon/owned.rs +++ b/src/scalar/multipolygon/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::multi_polygon_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, MultiPolygonArray}; use crate::geo_traits::MultiPolygonTrait; use crate::scalar::{MultiPolygon, Polygon}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedMultiPolygon { coords: CoordBuffer, @@ -82,6 +82,19 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedMultiPolygon } } +impl From> for MultiPolygonArray { + fn from(value: OwnedMultiPolygon) -> Self { + Self::new( + value.coords, + value.geom_offsets, + value.polygon_offsets, + value.ring_offsets, + None, + Default::default(), + ) + } +} + impl MultiPolygonTrait for OwnedMultiPolygon { type T = f64; type ItemType<'b> = Polygon<'b, O> where Self: 'b; diff --git a/src/scalar/point/owned.rs b/src/scalar/point/owned.rs index 4c3a2315e..19569865b 100644 --- a/src/scalar/point/owned.rs +++ b/src/scalar/point/owned.rs @@ -1,10 +1,10 @@ use crate::algorithm::native::eq::point_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, PointArray}; use crate::geo_traits::{CoordTrait, PointTrait}; use crate::io::geo::point_to_geo; use crate::scalar::Point; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedPoint { coords: CoordBuffer, geom_index: usize, @@ -35,6 +35,12 @@ impl<'a> From> for OwnedPoint { } } +impl From for PointArray { + fn from(value: OwnedPoint) -> Self { + Self::new(value.coords, None, Default::default()) + } +} + impl PointTrait for OwnedPoint { type T = f64; diff --git a/src/scalar/polygon/owned.rs b/src/scalar/polygon/owned.rs index c772dc2af..9520123d9 100644 --- a/src/scalar/polygon/owned.rs +++ b/src/scalar/polygon/owned.rs @@ -1,11 +1,11 @@ use crate::algorithm::native::eq::polygon_eq; -use crate::array::CoordBuffer; +use crate::array::{CoordBuffer, PolygonArray}; use crate::geo_traits::PolygonTrait; use crate::scalar::{LineString, Polygon}; use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedPolygon { coords: CoordBuffer, @@ -69,6 +69,18 @@ impl<'a, O: OffsetSizeTrait> From> for OwnedPolygon { } } +impl From> for PolygonArray { + fn from(value: OwnedPolygon) -> Self { + Self::new( + value.coords, + value.geom_offsets, + value.ring_offsets, + None, + Default::default(), + ) + } +} + impl PolygonTrait for OwnedPolygon { type T = f64; type ItemType<'b> = LineString<'b, O> where Self: 'b; diff --git a/src/scalar/rect/owned.rs b/src/scalar/rect/owned.rs index 7daa402b8..d586eadc1 100644 --- a/src/scalar/rect/owned.rs +++ b/src/scalar/rect/owned.rs @@ -1,9 +1,10 @@ use crate::algorithm::native::eq::rect_eq; +use crate::array::RectArray; use crate::geo_traits::RectTrait; use crate::scalar::Rect; use arrow_buffer::ScalarBuffer; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OwnedRect { values: ScalarBuffer, @@ -35,6 +36,12 @@ impl<'a> From> for OwnedRect { } } +impl From for RectArray { + fn from(value: OwnedRect) -> Self { + Self::new(value.values, None, Default::default()) + } +} + impl RectTrait for OwnedRect { type T = f64; type ItemType<'b> = (Self::T, Self::T) where Self: 'b;