diff --git a/rust/geoarrow/benches/area.rs b/rust/geoarrow/benches/area.rs index ee923912..8b2415a4 100644 --- a/rust/geoarrow/benches/area.rs +++ b/rust/geoarrow/benches/area.rs @@ -4,14 +4,14 @@ use geoarrow::array::{AsChunkedNativeArray, MultiPolygonArray}; use geoarrow::io::flatgeobuf::read_flatgeobuf; use std::fs::File; -fn load_file() -> MultiPolygonArray<2> { +fn load_file() -> MultiPolygonArray { let mut file = File::open("fixtures/flatgeobuf/countries.fgb").unwrap(); let table = read_flatgeobuf(&mut file, Default::default()).unwrap(); table .geometry_column(None) .unwrap() .as_ref() - .as_multi_polygon::<2>() + .as_multi_polygon() .chunks() .first() .unwrap() diff --git a/rust/geoarrow/benches/from_geo.rs b/rust/geoarrow/benches/from_geo.rs index 8f379a4e..ee9bf76c 100644 --- a/rust/geoarrow/benches/from_geo.rs +++ b/rust/geoarrow/benches/from_geo.rs @@ -2,6 +2,7 @@ use geo::polygon; use criterion::{criterion_group, criterion_main, Criterion}; use geoarrow::array::{PolygonArray, PolygonBuilder}; +use geoarrow::datatypes::Dimension; fn create_data() -> Vec { // An L shape @@ -24,9 +25,13 @@ pub fn criterion_benchmark(c: &mut Criterion) { c.bench_function("convert Vec to PolygonArray", |b| { b.iter(|| { - let mut_arr = - PolygonBuilder::<2>::from_polygons(&data, Default::default(), Default::default()); - let _arr: PolygonArray<2> = mut_arr.into(); + let mut_arr = PolygonBuilder::from_polygons( + &data, + Dimension::XY, + Default::default(), + Default::default(), + ); + let _arr: PolygonArray = mut_arr.into(); }) }); } diff --git a/rust/geoarrow/benches/geos_buffer.rs b/rust/geoarrow/benches/geos_buffer.rs index 4c6962b6..00257939 100644 --- a/rust/geoarrow/benches/geos_buffer.rs +++ b/rust/geoarrow/benches/geos_buffer.rs @@ -3,7 +3,7 @@ use geoarrow::algorithm::geos::Buffer; use geoarrow::array::{CoordBuffer, InterleavedCoordBuffer, PointArray, PolygonArray}; use geoarrow::datatypes::Dimension; -fn generate_data() -> PointArray<2> { +fn generate_data() -> PointArray { let coords = vec![0.0; 100_000]; let coord_buffer = CoordBuffer::Interleaved(InterleavedCoordBuffer::new(coords.into(), Dimension::XY)); @@ -15,7 +15,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { c.bench_function("buffer", |b| { b.iter(|| { - let _buffered: PolygonArray<2> = point_array.buffer(1.0, 8).unwrap(); + let _buffered: PolygonArray = point_array.buffer(1.0, 8).unwrap(); }) }); } diff --git a/rust/geoarrow/benches/nybb.rs b/rust/geoarrow/benches/nybb.rs index 8b8b4ed0..4a37ef6c 100644 --- a/rust/geoarrow/benches/nybb.rs +++ b/rust/geoarrow/benches/nybb.rs @@ -4,9 +4,10 @@ use arrow_ipc::reader::FileReader; use criterion::{criterion_group, criterion_main, Criterion}; use geoarrow::algorithm::geo::EuclideanDistance; use geoarrow::array::{MultiPolygonArray, PointArray}; +use geoarrow::datatypes::Dimension; use geoarrow::trait_::ArrayAccessor; -fn load_nybb() -> MultiPolygonArray<2> { +fn load_nybb() -> MultiPolygonArray { let file = File::open("fixtures/nybb.arrow").unwrap(); let reader = FileReader::try_new(file, None).unwrap(); @@ -20,7 +21,7 @@ fn load_nybb() -> MultiPolygonArray<2> { .position(|field| field.name() == "geometry") .unwrap(); let arr = record_batch.column(geom_idx); - let multi_poly_arr: MultiPolygonArray<2> = arr.as_ref().try_into().unwrap(); + let multi_poly_arr: MultiPolygonArray = (arr.as_ref(), Dimension::XY).try_into().unwrap(); arrays.push(multi_poly_arr); } @@ -39,7 +40,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { c.bench_function("euclidean distance to scalar point", |b| { b.iter(|| { let point = geo::Point::new(0.0f64, 0.0f64); - let point_array = PointArray::from(vec![point].as_slice()); + let point_array = PointArray::from((vec![point].as_slice(), Dimension::XY)); let _distances = array.euclidean_distance(&point_array.value(0)); }) diff --git a/rust/geoarrow/benches/translate.rs b/rust/geoarrow/benches/translate.rs index fd860036..bbcef5ac 100644 --- a/rust/geoarrow/benches/translate.rs +++ b/rust/geoarrow/benches/translate.rs @@ -3,8 +3,9 @@ use geo::polygon; use criterion::{criterion_group, criterion_main, Criterion}; use geoarrow::algorithm::geo::Translate; use geoarrow::array::PolygonArray; +use geoarrow::datatypes::Dimension; -fn create_data() -> PolygonArray<2> { +fn create_data() -> PolygonArray { // An L shape // https://github.com/georust/geo/blob/7cb7d0ffa6bf1544c5ca9922bd06100c36f815d7/README.md?plain=1#L40 let poly = polygon![ @@ -17,7 +18,7 @@ fn create_data() -> PolygonArray<2> { (x: 0.0, y: 0.0), ]; let v = vec![poly; 1000]; - v.as_slice().into() + (v.as_slice(), Dimension::XY).into() } pub fn criterion_benchmark(c: &mut Criterion) { diff --git a/rust/geoarrow/src/algorithm/native/concatenate.rs b/rust/geoarrow/src/algorithm/native/concatenate.rs index 50507875..1973ea1c 100644 --- a/rust/geoarrow/src/algorithm/native/concatenate.rs +++ b/rust/geoarrow/src/algorithm/native/concatenate.rs @@ -1,5 +1,8 @@ +use std::collections::HashSet; + use crate::array::*; use crate::chunked_array::*; +use crate::datatypes::Dimension; use crate::error::Result; use crate::trait_::ArrayAccessor; @@ -13,8 +16,10 @@ impl Concatenate for &[PointArray] { type Output = Result; fn concatenate(&self) -> Self::Output { + let common_dimension = infer_common_dimension(self.iter().map(|arr| arr.dimension())); + let output_capacity = self.iter().fold(0, |sum, val| sum + val.buffer_lengths()); - let mut builder = PointBuilder::with_capacity(output_capacity); + let mut builder = PointBuilder::with_capacity(common_dimension, output_capacity); self.iter() .for_each(|chunk| chunk.iter().for_each(|p| builder.push_point(p.as_ref()))); Ok(builder.finish()) @@ -27,10 +32,13 @@ macro_rules! impl_concatenate { type Output = Result<$array>; fn concatenate(&self) -> Self::Output { + let common_dimension = + infer_common_dimension(self.iter().map(|arr| arr.dimension())); + let output_capacity = self.iter().fold(<$capacity>::new_empty(), |sum, val| { sum + val.buffer_lengths() }); - let mut builder = <$builder>::with_capacity(output_capacity); + let mut builder = <$builder>::with_capacity(common_dimension, output_capacity); for chunk in self.iter() { for geom in chunk.iter() { builder.$push_func(geom.as_ref())?; @@ -80,6 +88,12 @@ impl_concatenate!( push_geometry_collection ); +fn infer_common_dimension(dimensions: impl Iterator) -> Dimension { + let dimensions: HashSet = HashSet::from_iter(dimensions); + assert_eq!(dimensions.len(), 1); + dimensions.into_iter().next().unwrap() +} + impl Concatenate for ChunkedPointArray { type Output = Result; diff --git a/rust/geoarrow/src/array/mixed/array.rs b/rust/geoarrow/src/array/mixed/array.rs index db39061c..e5609020 100644 --- a/rust/geoarrow/src/array/mixed/array.rs +++ b/rust/geoarrow/src/array/mixed/array.rs @@ -659,12 +659,12 @@ impl TryFrom<&UnionArray> for MixedGeometryArray { Ok(Self::new( type_ids, offsets, - points.get(0).cloned().unwrap_or_default(), - line_strings.get(0).cloned().unwrap_or_default(), - polygons.get(0).cloned().unwrap_or_default(), - multi_points.get(0).cloned().unwrap_or_default(), - multi_line_strings.get(0).cloned().unwrap_or_default(), - multi_polygons.get(0).cloned().unwrap_or_default(), + points.first().cloned().unwrap_or_default(), + line_strings.first().cloned().unwrap_or_default(), + polygons.first().cloned().unwrap_or_default(), + multi_points.first().cloned().unwrap_or_default(), + multi_line_strings.first().cloned().unwrap_or_default(), + multi_polygons.first().cloned().unwrap_or_default(), Default::default(), )) } diff --git a/rust/geoarrow/src/io/parquet/writer/metadata.rs b/rust/geoarrow/src/io/parquet/writer/metadata.rs index 91d2c830..ac7df611 100644 --- a/rust/geoarrow/src/io/parquet/writer/metadata.rs +++ b/rust/geoarrow/src/io/parquet/writer/metadata.rs @@ -79,34 +79,32 @@ impl ColumnInfo { let array_ref = array.as_ref(); // We only have to do this for mixed arrays because other arrays are statically known - match array_ref.data_type() { - NativeType::Mixed(_, _) => { - let mixed_arr = array_ref.as_mixed(); - if mixed_arr.has_points() { - self.geometry_types.insert(GeoParquetGeometryType::Point); - } - if mixed_arr.has_line_strings() { - self.geometry_types - .insert(GeoParquetGeometryType::LineString); - } - if mixed_arr.has_polygons() { - self.geometry_types.insert(GeoParquetGeometryType::Polygon); - } - if mixed_arr.has_multi_points() { - self.geometry_types - .insert(GeoParquetGeometryType::MultiPoint); - } - if mixed_arr.has_multi_line_strings() { - self.geometry_types - .insert(GeoParquetGeometryType::MultiLineString); - } - if mixed_arr.has_multi_polygons() { - self.geometry_types - .insert(GeoParquetGeometryType::MultiPolygon); - } + if let NativeType::Mixed(_, _) = array_ref.data_type() { + let mixed_arr = array_ref.as_mixed(); + if mixed_arr.has_points() { + self.geometry_types.insert(GeoParquetGeometryType::Point); + } + if mixed_arr.has_line_strings() { + self.geometry_types + .insert(GeoParquetGeometryType::LineString); + } + if mixed_arr.has_polygons() { + self.geometry_types.insert(GeoParquetGeometryType::Polygon); + } + if mixed_arr.has_multi_points() { + self.geometry_types + .insert(GeoParquetGeometryType::MultiPoint); + } + if mixed_arr.has_multi_line_strings() { + self.geometry_types + .insert(GeoParquetGeometryType::MultiLineString); + } + if mixed_arr.has_multi_polygons() { + self.geometry_types + .insert(GeoParquetGeometryType::MultiPolygon); } - _ => (), } + Ok(()) } diff --git a/rust/geoarrow/src/test/geoarrow_data/mod.rs b/rust/geoarrow/src/test/geoarrow_data/mod.rs index b73cac74..2e229dee 100644 --- a/rust/geoarrow/src/test/geoarrow_data/mod.rs +++ b/rust/geoarrow/src/test/geoarrow_data/mod.rs @@ -17,12 +17,22 @@ macro_rules! geoarrow_data_impl { .unwrap() } }; + ($fn_name:ident, $file_part:tt, $return_type:ty, "WKB") => { + pub(crate) fn $fn_name() -> $return_type { + let path = format!( + "fixtures/geoarrow-data/example/example-{}.arrow", + $file_part + ); + let geometry_dyn_column = read_geometry_column(&path); + geometry_dyn_column.as_ref().try_into().unwrap() + } + }; } // Point geoarrow_data_impl!(example_point_interleaved, "point-interleaved", PointArray); geoarrow_data_impl!(example_point_separated, "point", PointArray); -geoarrow_data_impl!(example_point_wkb, "point-wkb", WKBArray); +geoarrow_data_impl!(example_point_wkb, "point-wkb", WKBArray, "WKB"); // LineString geoarrow_data_impl!( @@ -31,7 +41,12 @@ geoarrow_data_impl!( LineStringArray ); geoarrow_data_impl!(example_linestring_separated, "linestring", LineStringArray); -geoarrow_data_impl!(example_linestring_wkb, "linestring-wkb", WKBArray); +geoarrow_data_impl!( + example_linestring_wkb, + "linestring-wkb", + WKBArray, + "WKB" +); // Polygon geoarrow_data_impl!( @@ -40,7 +55,7 @@ geoarrow_data_impl!( PolygonArray ); geoarrow_data_impl!(example_polygon_separated, "polygon", PolygonArray); -geoarrow_data_impl!(example_polygon_wkb, "polygon-wkb", WKBArray); +geoarrow_data_impl!(example_polygon_wkb, "polygon-wkb", WKBArray, "WKB"); // MultiPoint geoarrow_data_impl!( @@ -49,7 +64,12 @@ geoarrow_data_impl!( MultiPointArray ); geoarrow_data_impl!(example_multipoint_separated, "multipoint", MultiPointArray); -geoarrow_data_impl!(example_multipoint_wkb, "multipoint-wkb", WKBArray); +geoarrow_data_impl!( + example_multipoint_wkb, + "multipoint-wkb", + WKBArray, + "WKB" +); // MultiLineString geoarrow_data_impl!( @@ -65,7 +85,8 @@ geoarrow_data_impl!( geoarrow_data_impl!( example_multilinestring_wkb, "multilinestring-wkb", - WKBArray + WKBArray, + "WKB" ); // MultiPolygon @@ -79,4 +100,9 @@ geoarrow_data_impl!( "multipolygon", MultiPolygonArray ); -geoarrow_data_impl!(example_multipolygon_wkb, "multipolygon-wkb", WKBArray); +geoarrow_data_impl!( + example_multipolygon_wkb, + "multipolygon-wkb", + WKBArray, + "WKB" +); diff --git a/rust/geoarrow/src/test/point.rs b/rust/geoarrow/src/test/point.rs index 708e3695..9e396e05 100644 --- a/rust/geoarrow/src/test/point.rs +++ b/rust/geoarrow/src/test/point.rs @@ -31,7 +31,7 @@ pub(crate) fn p2() -> Point { } pub(crate) fn point_array() -> PointArray { - let geoms = vec![p0(), p1(), p2()]; + let geoms = [p0(), p1(), p2()]; PointBuilder::from_points( geoms.iter(), Dimension::XY, diff --git a/rust/geoarrow/src/trait_.rs b/rust/geoarrow/src/trait_.rs index dc95d8a4..ab50ddb9 100644 --- a/rust/geoarrow/src/trait_.rs +++ b/rust/geoarrow/src/trait_.rs @@ -265,6 +265,7 @@ pub trait NativeArray: ArrayBase { /// ``` fn coord_type(&self) -> CoordType; + /// The dimension of this array. fn dimension(&self) -> Dimension { self.data_type().dimension() }