Skip to content

Commit

Permalink
progress
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Nov 16, 2024
1 parent 0a43ff0 commit b4bce05
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 53 deletions.
4 changes: 2 additions & 2 deletions rust/geoarrow/benches/area.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ use geoarrow::array::{AsChunkedNativeArray, MultiPolygonArray};
use geoarrow::io::flatgeobuf::read_flatgeobuf;
use std::fs::File;

fn load_file() -> MultiPolygonArray<2> {
fn load_file() -> MultiPolygonArray {
let mut file = File::open("fixtures/flatgeobuf/countries.fgb").unwrap();
let table = read_flatgeobuf(&mut file, Default::default()).unwrap();
table
.geometry_column(None)
.unwrap()
.as_ref()
.as_multi_polygon::<2>()
.as_multi_polygon()
.chunks()
.first()
.unwrap()
Expand Down
11 changes: 8 additions & 3 deletions rust/geoarrow/benches/from_geo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use geo::polygon;

use criterion::{criterion_group, criterion_main, Criterion};
use geoarrow::array::{PolygonArray, PolygonBuilder};
use geoarrow::datatypes::Dimension;

fn create_data() -> Vec<geo::Polygon> {
// An L shape
Expand All @@ -24,9 +25,13 @@ pub fn criterion_benchmark(c: &mut Criterion) {

c.bench_function("convert Vec<geo::Polygon> to PolygonArray", |b| {
b.iter(|| {
let mut_arr =
PolygonBuilder::<2>::from_polygons(&data, Default::default(), Default::default());
let _arr: PolygonArray<2> = mut_arr.into();
let mut_arr = PolygonBuilder::from_polygons(
&data,
Dimension::XY,
Default::default(),
Default::default(),
);
let _arr: PolygonArray = mut_arr.into();
})
});
}
Expand Down
4 changes: 2 additions & 2 deletions rust/geoarrow/benches/geos_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use geoarrow::algorithm::geos::Buffer;
use geoarrow::array::{CoordBuffer, InterleavedCoordBuffer, PointArray, PolygonArray};
use geoarrow::datatypes::Dimension;

fn generate_data() -> PointArray<2> {
fn generate_data() -> PointArray {
let coords = vec![0.0; 100_000];
let coord_buffer =
CoordBuffer::Interleaved(InterleavedCoordBuffer::new(coords.into(), Dimension::XY));
Expand All @@ -15,7 +15,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {

c.bench_function("buffer", |b| {
b.iter(|| {
let _buffered: PolygonArray<2> = point_array.buffer(1.0, 8).unwrap();
let _buffered: PolygonArray = point_array.buffer(1.0, 8).unwrap();
})
});
}
Expand Down
7 changes: 4 additions & 3 deletions rust/geoarrow/benches/nybb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ use arrow_ipc::reader::FileReader;
use criterion::{criterion_group, criterion_main, Criterion};
use geoarrow::algorithm::geo::EuclideanDistance;
use geoarrow::array::{MultiPolygonArray, PointArray};
use geoarrow::datatypes::Dimension;
use geoarrow::trait_::ArrayAccessor;

fn load_nybb() -> MultiPolygonArray<2> {
fn load_nybb() -> MultiPolygonArray {
let file = File::open("fixtures/nybb.arrow").unwrap();
let reader = FileReader::try_new(file, None).unwrap();

Expand All @@ -20,7 +21,7 @@ fn load_nybb() -> MultiPolygonArray<2> {
.position(|field| field.name() == "geometry")
.unwrap();
let arr = record_batch.column(geom_idx);
let multi_poly_arr: MultiPolygonArray<2> = arr.as_ref().try_into().unwrap();
let multi_poly_arr: MultiPolygonArray = (arr.as_ref(), Dimension::XY).try_into().unwrap();
arrays.push(multi_poly_arr);
}

Expand All @@ -39,7 +40,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("euclidean distance to scalar point", |b| {
b.iter(|| {
let point = geo::Point::new(0.0f64, 0.0f64);
let point_array = PointArray::from(vec![point].as_slice());
let point_array = PointArray::from((vec![point].as_slice(), Dimension::XY));

let _distances = array.euclidean_distance(&point_array.value(0));
})
Expand Down
5 changes: 3 additions & 2 deletions rust/geoarrow/benches/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ use geo::polygon;
use criterion::{criterion_group, criterion_main, Criterion};
use geoarrow::algorithm::geo::Translate;
use geoarrow::array::PolygonArray;
use geoarrow::datatypes::Dimension;

fn create_data() -> PolygonArray<2> {
fn create_data() -> PolygonArray {
// An L shape
// https://github.com/georust/geo/blob/7cb7d0ffa6bf1544c5ca9922bd06100c36f815d7/README.md?plain=1#L40
let poly = polygon![
Expand All @@ -17,7 +18,7 @@ fn create_data() -> PolygonArray<2> {
(x: 0.0, y: 0.0),
];
let v = vec![poly; 1000];
v.as_slice().into()
(v.as_slice(), Dimension::XY).into()
}

pub fn criterion_benchmark(c: &mut Criterion) {
Expand Down
18 changes: 16 additions & 2 deletions rust/geoarrow/src/algorithm/native/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::collections::HashSet;

use crate::array::*;
use crate::chunked_array::*;
use crate::datatypes::Dimension;
use crate::error::Result;
use crate::trait_::ArrayAccessor;

Expand All @@ -13,8 +16,10 @@ impl Concatenate for &[PointArray] {
type Output = Result<PointArray>;

fn concatenate(&self) -> Self::Output {
let common_dimension = infer_common_dimension(self.iter().map(|arr| arr.dimension()));

let output_capacity = self.iter().fold(0, |sum, val| sum + val.buffer_lengths());
let mut builder = PointBuilder::with_capacity(output_capacity);
let mut builder = PointBuilder::with_capacity(common_dimension, output_capacity);
self.iter()
.for_each(|chunk| chunk.iter().for_each(|p| builder.push_point(p.as_ref())));
Ok(builder.finish())
Expand All @@ -27,10 +32,13 @@ macro_rules! impl_concatenate {
type Output = Result<$array>;

fn concatenate(&self) -> Self::Output {
let common_dimension =
infer_common_dimension(self.iter().map(|arr| arr.dimension()));

let output_capacity = self.iter().fold(<$capacity>::new_empty(), |sum, val| {
sum + val.buffer_lengths()
});
let mut builder = <$builder>::with_capacity(output_capacity);
let mut builder = <$builder>::with_capacity(common_dimension, output_capacity);
for chunk in self.iter() {
for geom in chunk.iter() {
builder.$push_func(geom.as_ref())?;
Expand Down Expand Up @@ -80,6 +88,12 @@ impl_concatenate!(
push_geometry_collection
);

fn infer_common_dimension(dimensions: impl Iterator<Item = Dimension>) -> Dimension {
let dimensions: HashSet<Dimension> = HashSet::from_iter(dimensions);
assert_eq!(dimensions.len(), 1);
dimensions.into_iter().next().unwrap()
}

impl Concatenate for ChunkedPointArray {
type Output = Result<PointArray>;

Expand Down
12 changes: 6 additions & 6 deletions rust/geoarrow/src/array/mixed/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -659,12 +659,12 @@ impl TryFrom<&UnionArray> for MixedGeometryArray {
Ok(Self::new(
type_ids,
offsets,
points.get(0).cloned().unwrap_or_default(),
line_strings.get(0).cloned().unwrap_or_default(),
polygons.get(0).cloned().unwrap_or_default(),
multi_points.get(0).cloned().unwrap_or_default(),
multi_line_strings.get(0).cloned().unwrap_or_default(),
multi_polygons.get(0).cloned().unwrap_or_default(),
points.first().cloned().unwrap_or_default(),
line_strings.first().cloned().unwrap_or_default(),
polygons.first().cloned().unwrap_or_default(),
multi_points.first().cloned().unwrap_or_default(),
multi_line_strings.first().cloned().unwrap_or_default(),
multi_polygons.first().cloned().unwrap_or_default(),
Default::default(),
))
}
Expand Down
50 changes: 24 additions & 26 deletions rust/geoarrow/src/io/parquet/writer/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,34 +79,32 @@ impl ColumnInfo {
let array_ref = array.as_ref();

// We only have to do this for mixed arrays because other arrays are statically known
match array_ref.data_type() {
NativeType::Mixed(_, _) => {
let mixed_arr = array_ref.as_mixed();
if mixed_arr.has_points() {
self.geometry_types.insert(GeoParquetGeometryType::Point);
}
if mixed_arr.has_line_strings() {
self.geometry_types
.insert(GeoParquetGeometryType::LineString);
}
if mixed_arr.has_polygons() {
self.geometry_types.insert(GeoParquetGeometryType::Polygon);
}
if mixed_arr.has_multi_points() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiPoint);
}
if mixed_arr.has_multi_line_strings() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiLineString);
}
if mixed_arr.has_multi_polygons() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiPolygon);
}
if let NativeType::Mixed(_, _) = array_ref.data_type() {
let mixed_arr = array_ref.as_mixed();
if mixed_arr.has_points() {
self.geometry_types.insert(GeoParquetGeometryType::Point);
}
if mixed_arr.has_line_strings() {
self.geometry_types
.insert(GeoParquetGeometryType::LineString);
}
if mixed_arr.has_polygons() {
self.geometry_types.insert(GeoParquetGeometryType::Polygon);
}
if mixed_arr.has_multi_points() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiPoint);
}
if mixed_arr.has_multi_line_strings() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiLineString);
}
if mixed_arr.has_multi_polygons() {
self.geometry_types
.insert(GeoParquetGeometryType::MultiPolygon);
}
_ => (),
}

Ok(())
}

Expand Down
38 changes: 32 additions & 6 deletions rust/geoarrow/src/test/geoarrow_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,22 @@ macro_rules! geoarrow_data_impl {
.unwrap()
}
};
($fn_name:ident, $file_part:tt, $return_type:ty, "WKB") => {
pub(crate) fn $fn_name() -> $return_type {
let path = format!(
"fixtures/geoarrow-data/example/example-{}.arrow",
$file_part
);
let geometry_dyn_column = read_geometry_column(&path);
geometry_dyn_column.as_ref().try_into().unwrap()
}
};
}

// Point
geoarrow_data_impl!(example_point_interleaved, "point-interleaved", PointArray);
geoarrow_data_impl!(example_point_separated, "point", PointArray);
geoarrow_data_impl!(example_point_wkb, "point-wkb", WKBArray<i64>);
geoarrow_data_impl!(example_point_wkb, "point-wkb", WKBArray<i64>, "WKB");

// LineString
geoarrow_data_impl!(
Expand All @@ -31,7 +41,12 @@ geoarrow_data_impl!(
LineStringArray
);
geoarrow_data_impl!(example_linestring_separated, "linestring", LineStringArray);
geoarrow_data_impl!(example_linestring_wkb, "linestring-wkb", WKBArray<i64>);
geoarrow_data_impl!(
example_linestring_wkb,
"linestring-wkb",
WKBArray<i64>,
"WKB"
);

// Polygon
geoarrow_data_impl!(
Expand All @@ -40,7 +55,7 @@ geoarrow_data_impl!(
PolygonArray
);
geoarrow_data_impl!(example_polygon_separated, "polygon", PolygonArray);
geoarrow_data_impl!(example_polygon_wkb, "polygon-wkb", WKBArray<i64>);
geoarrow_data_impl!(example_polygon_wkb, "polygon-wkb", WKBArray<i64>, "WKB");

// MultiPoint
geoarrow_data_impl!(
Expand All @@ -49,7 +64,12 @@ geoarrow_data_impl!(
MultiPointArray
);
geoarrow_data_impl!(example_multipoint_separated, "multipoint", MultiPointArray);
geoarrow_data_impl!(example_multipoint_wkb, "multipoint-wkb", WKBArray<i64>);
geoarrow_data_impl!(
example_multipoint_wkb,
"multipoint-wkb",
WKBArray<i64>,
"WKB"
);

// MultiLineString
geoarrow_data_impl!(
Expand All @@ -65,7 +85,8 @@ geoarrow_data_impl!(
geoarrow_data_impl!(
example_multilinestring_wkb,
"multilinestring-wkb",
WKBArray<i64>
WKBArray<i64>,
"WKB"
);

// MultiPolygon
Expand All @@ -79,4 +100,9 @@ geoarrow_data_impl!(
"multipolygon",
MultiPolygonArray
);
geoarrow_data_impl!(example_multipolygon_wkb, "multipolygon-wkb", WKBArray<i64>);
geoarrow_data_impl!(
example_multipolygon_wkb,
"multipolygon-wkb",
WKBArray<i64>,
"WKB"
);
2 changes: 1 addition & 1 deletion rust/geoarrow/src/test/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub(crate) fn p2() -> Point {
}

pub(crate) fn point_array() -> PointArray {
let geoms = vec![p0(), p1(), p2()];
let geoms = [p0(), p1(), p2()];
PointBuilder::from_points(
geoms.iter(),
Dimension::XY,
Expand Down
1 change: 1 addition & 0 deletions rust/geoarrow/src/trait_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ pub trait NativeArray: ArrayBase {
/// ```
fn coord_type(&self) -> CoordType;

/// The dimension of this array.
fn dimension(&self) -> Dimension {
self.data_type().dimension()
}
Expand Down

0 comments on commit b4bce05

Please sign in to comment.