diff --git a/geozero/src/feature_processor.rs b/geozero/src/feature_processor.rs index 2b8554a9..ba82c02a 100644 --- a/geozero/src/feature_processor.rs +++ b/geozero/src/feature_processor.rs @@ -6,22 +6,46 @@ use crate::property_processor::PropertyProcessor; #[allow(unused_variables)] pub trait FeatureProcessor: GeomProcessor + PropertyProcessor { /// Begin of dataset processing + /// + /// ## Invariants + /// + /// - `dataset_begin` is called _only once_ for an entire dataset. + /// - `dataset_begin` is called before all other methods, including `feature_begin`, + /// `properties_begin`, `geometry_begin`, and all methods from [`GeomProcessor`] and + /// [`PropertyProcessor`] fn dataset_begin(&mut self, name: Option<&str>) -> Result<()> { Ok(()) } /// End of dataset processing + /// + /// ## Invariants + /// + /// - `dataset_end` is called _only once_ for an entire dataset. + /// - No other methods may be called after `dataset_end`. fn dataset_end(&mut self) -> Result<()> { Ok(()) } /// Begin of feature processing + /// + /// - `idx`: the positional row index in the dataset. For the `n`th row, `idx` will be + /// `n`. + /// - `feature_begin` will be called before both `properties_begin` and `geometry_begin`. fn feature_begin(&mut self, idx: u64) -> Result<()> { Ok(()) } /// End of feature processing + /// + /// - `idx`: the positional row index in the dataset. For the `n`th row, `idx` will be + /// `n`. + /// - `feature_end` will be called after both `properties_end` and `geometry_end`. fn feature_end(&mut self, idx: u64) -> Result<()> { Ok(()) } /// Begin of feature property processing + /// + /// ## Invariants + /// + /// - `properties_begin` will not be called a second time before `properties_end` is called. fn properties_begin(&mut self) -> Result<()> { Ok(()) } @@ -30,6 +54,10 @@ pub trait FeatureProcessor: GeomProcessor + PropertyProcessor { Ok(()) } /// Begin of feature geometry processing + /// + /// ## Following events + /// + /// - Relevant methods from [`GeomProcessor`] will be called for each geometry. fn geometry_begin(&mut self) -> Result<()> { Ok(()) } diff --git a/geozero/src/geometry_processor.rs b/geozero/src/geometry_processor.rs index 56873716..c7a736a7 100644 --- a/geozero/src/geometry_processor.rs +++ b/geozero/src/geometry_processor.rs @@ -103,6 +103,9 @@ pub trait GeomProcessor { } /// Process empty coordinates, like WKT's `POINT EMPTY` + /// + /// - `idx` is the positional index inside this geometry. `idx` will usually be 0 except in the + /// case of a MultiPoint or GeometryCollection. fn empty_point(&mut self, idx: usize) -> Result<()> { Err(GeozeroError::Geometry( "The input was an empty Point, but the output doesn't support empty Points".to_string(), @@ -123,21 +126,47 @@ pub trait GeomProcessor { /// Begin of MultiPoint processing /// - /// Next: size * xy/coordinate + /// Next: `size` calls to [`xy()`][`Self::xy()`] or [`coordinate()`][`Self::coordinate()`] + /// + /// ## Parameters + /// + /// - `size`: the number of Points in this MultiPoint + /// - `idx`: the positional index of this MultiPoint. This will be 0 except in the case of a + /// GeometryCollection. + /// + /// ## Following events + /// + /// - `size` calls to [`xy()`][`Self::xy()`] or [`coordinate()`][`Self::coordinate()`] for each point. + /// - [`multipoint_end`][Self::multipoint_end()] to end this MultiPoint + /// + /// As of v0.12, `point_begin` and `point_end` are **not** called for each point in a + /// MultiPoint. See also discussion in [#184](https://github.com/georust/geozero/issues/184). fn multipoint_begin(&mut self, size: usize, idx: usize) -> Result<()> { Ok(()) } /// End of MultiPoint processing + /// + /// - `idx`: the positional index of this MultiPoint. This will be 0 except in the case of a + /// GeometryCollection. fn multipoint_end(&mut self, idx: usize) -> Result<()> { Ok(()) } /// Begin of `LineString` processing /// - /// An untagged `LineString` is either a Polygon ring or part of a `MultiLineString` + /// ## Parameters /// - /// Next: size * xy/coordinate + /// - `tagged`: if `false`, this `LineString` is either a Polygon ring or part of a `MultiLineString` + /// - `size`: the number of coordinates in this LineString + /// - `idx`: the positional index of this LineString. This will be 0 for a tagged LineString + /// except in the case of a GeometryCollection. This can be non-zero for an untagged + /// LineString for MultiLineStrings or Polygons with multiple interiors. + /// + /// ## Following events + /// + /// - `size` calls to [`xy()`][`Self::xy()`] or [`coordinate()`][`Self::coordinate()`] for each coordinate. + /// - [`linestring_end`][Self::linestring_end()] to end this LineString fn linestring_begin(&mut self, tagged: bool, size: usize, idx: usize) -> Result<()> { Ok(()) } @@ -150,6 +179,14 @@ pub trait GeomProcessor { /// Begin of `MultiLineString` processing /// /// Next: size * LineString (untagged) + /// + /// ## Following events + /// + /// - `size` calls to: + /// - [`linestring_begin`][Self::linestring_begin] (with `tagged` set to `false`). + /// - one or more calls to [`xy()`][`Self::xy()`] or [`coordinate()`][`Self::coordinate()`] for each coordinate in the LineString. + /// - [`linestring_end`][Self::linestring_end] + /// - [`multilinestring_end`][Self::multilinestring_end()] to end this MultiLineString fn multilinestring_begin(&mut self, size: usize, idx: usize) -> Result<()> { Ok(()) } @@ -159,11 +196,23 @@ pub trait GeomProcessor { Ok(()) } - /// Begin of Polygon processing + /// Begin of `Polygon` processing /// - /// An untagged Polygon is part of a `MultiPolygon` + /// ## Parameters /// - /// Next: size * LineString (untagged) = rings + /// - `tagged`: if `false`, this `Polygon` is part of a `MultiPolygon`. + /// - `size`: the number of rings in this Polygon, _including_ the exterior ring. + /// - `idx`: the positional index of this Polygon. This will be 0 for a tagged Polygon + /// except in the case of a GeometryCollection. This can be non-zero for an untagged + /// Polygon for a MultiPolygon with multiple interiors + /// + /// ## Following events + /// + /// - `size` calls to: + /// - [`linestring_begin`][Self::linestring_begin] (with `tagged` set to `false`). + /// - one or more calls to [`xy()`][`Self::xy()`] or [`coordinate()`][`Self::coordinate()`] for each coordinate in the ring. + /// - [`linestring_end`][Self::linestring_end] + /// - [`polygon_end`][Self::polygon_end()] to end this Polygon fn polygon_begin(&mut self, tagged: bool, size: usize, idx: usize) -> Result<()> { Ok(()) } @@ -175,7 +224,19 @@ pub trait GeomProcessor { /// Begin of `MultiPolygon` processing /// - /// Next: size * Polygon (untagged) + /// ## Parameters + /// + /// - `size`: the number of Polygons in this MultiPolygon. + /// - `idx`: the positional index of this MultiPolygon. This will be 0 except in the case of a + /// GeometryCollection. + /// + /// ## Following events + /// + /// - `size` calls to: + /// - [`polygon_begin`][Self::polygon_begin] (with `tagged` set to `false`). + /// - See [`polygon_begin`][Self::polygon_begin] for its internal calls. + /// - [`polygon_end`][Self::polygon_end] + /// - [`multipolygon_end`][Self::multipolygon_end()] to end this MultiPolygon fn multipolygon_begin(&mut self, size: usize, idx: usize) -> Result<()> { Ok(()) } @@ -186,6 +247,19 @@ pub trait GeomProcessor { } /// Begin of `GeometryCollection` processing + /// + /// ## Parameters + /// + /// - `size`: the number of geometries in this GeometryCollection. + /// - `idx`: the positional index of this GeometryCollection. This can be greater than 0 for + /// nested geometry collections but also when using `GeometryProcessor` to process a + /// `Feature` whose geometry is a `GeometryCollection`. For an example of this see [this + /// comment](https://github.com/georust/geozero/pull/183#discussion_r1454319662). + /// + /// ## Following events + /// + /// - `size` calls to one of the internal geometry `begin` and `end` methods, called in pairs. + /// - [`geometrycollection_end`][Self::geometrycollection_end()] to end this GeometryCollection fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> Result<()> { Ok(()) } diff --git a/geozero/src/property_processor.rs b/geozero/src/property_processor.rs index e0cba015..3b32fc67 100644 --- a/geozero/src/property_processor.rs +++ b/geozero/src/property_processor.rs @@ -18,7 +18,9 @@ pub enum ColumnValue<'a> { Float(f32), Double(f64), String(&'a str), + /// A JSON-formatted string Json(&'a str), + /// A datetime stored as an ISO8601-formatted string DateTime(&'a str), Binary(&'a [u8]), } @@ -42,6 +44,19 @@ pub enum ColumnValue<'a> { #[allow(unused_variables)] pub trait PropertyProcessor { /// Process property value. Abort processing, if return value is true. + /// + /// - `idx`: the positional index of the property. + /// - `name` is the name of the column + /// - `value` is the value of this field + /// + /// ## Notes: + /// + /// - It is not guaranteed that `idx` is consistent across rows, nor is it + /// guaranteed that the set of names in each row is the same. Some input formats, like + /// GeoJSON, are schema-less and properties may change in every row. For this reason, it is + /// suggested to use the `name` parameter for matching across rows. + /// - It is not guaranteed that the data type of `name` is consistent across rows. For a given + /// `name`, it may be numeric in one row and string in the next. fn property(&mut self, idx: usize, name: &str, value: &ColumnValue) -> Result { Ok(true) }