Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add endpoint_attrs param to simplify_graph to flexibly relax strictness #1117

Merged
merged 5 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ repos:
types_or: [markdown, yaml]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.1.9"
rev: "v0.1.15"
hooks:
- id: ruff
args: [--fix]
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

## 1.9.0 (Unreleased)

- add endpoint_attrs argument to simplification.simplify_graph function to flexibly relax strictness (#1117)
- fix a bug in the features module's polygon handling (#1104)
- update obsolete numpy random number generation (#1108)
- update warning messages to note that deprecated code will be removed in v2.0.0 (#1111)
- deprecate strict argument in simplification.simplify_graph function in favor of new endpoint_attrs argument (#1117)
gboeing marked this conversation as resolved.
Show resolved Hide resolved
- deprecate north, south, east, west arguments throughout package in favor of bbox tuple argument (#1112)
- deprecate return_coords argument in graph.graph_from_address function (#1105)
- deprecate return_hex argument in plot.get_colors function (#1109)
Expand Down
154 changes: 95 additions & 59 deletions osmnx/simplification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,37 @@
from ._errors import GraphSimplificationError


def _is_endpoint(G, node, strict=True):
def _is_endpoint(G, node, endpoint_attrs):
"""
Determine if a node is a true endpoint of an edge.

Return True if the node is a "real" endpoint of an edge in the network,
otherwise False. OSM data includes lots of nodes that exist only as points
to help streets bend around curves. An end point is a node that either:
1) is its own neighbor, ie, it self-loops.
2) or, has no incoming edges or no outgoing edges, ie, all its incident
edges point inward or all its incident edges point outward.
3) or, it does not have exactly two neighbors and degree of 2 or 4.
4) or, if strict mode is false, if its edges have different OSM IDs.
Return True if the node is a "true" endpoint of an edge in the network,
otherwise False. OpenStreetMap data includes many nodes that exist only as
geometric vertices to allow ways to curve. A true edge endpoint is a node
that satisfies at least 1 of the following 4 rules:

1) It is its own neighbor (ie, it self-loops).

2) Or, it has no incoming edges or no outgoing edges (ie, all its incident
edges are inbound or all its incident edges are outbound).

3) Or, it does not have exactly two neighbors and degree of 2 or 4.

4) Or, if `endpoint_attrs` is not None, and its incident edges have
different values than each other for any of the edge attributes in
`endpoint_attrs`.

Parameters
----------
G : networkx.MultiDiGraph
input graph
node : int
the node to examine
strict : bool
if False, allow nodes to be end points even if they fail all other rules
but have edges with different OSM IDs
endpoint_attrs : iterable
An iterable of edge attribute names for relaxing the strictness of
endpoint determination. If not None, a node is an endpoint if its
incident edges have different values then each other for any of the
edge attributes in `endpoint_attrs`.

Returns
-------
Expand All @@ -46,36 +55,37 @@ def _is_endpoint(G, node, strict=True):
n = len(neighbors)
d = G.degree(node)

# rule 1
# RULE 1
# if the node appears in its list of neighbors, it self-loops: this is
# always an endpoint
if node in neighbors:
# if the node appears in its list of neighbors, it self-loops
# this is always an endpoint.
return True

# rule 2
# RULE 2
# if node has no incoming edges or no outgoing edges, it is an endpoint
if G.out_degree(node) == 0 or G.in_degree(node) == 0:
# if node has no incoming edges or no outgoing edges, it is an endpoint
return True

# rule 3
# RULE 3
# else, if it does NOT have 2 neighbors AND either 2 or 4 directed edges,
# it is an endpoint. either it has 1 or 3+ neighbors, in which case it is
# a dead-end or an intersection of multiple streets or it has 2 neighbors
# but 3 degree (indicating a change from oneway to twoway) or more than 4
# degree (indicating a parallel edge) and thus is an endpoint
if not ((n == 2) and (d in {2, 4})): # noqa: PLR2004
# else, if it does NOT have 2 neighbors AND either 2 or 4 directed
# edges, it is an endpoint. either it has 1 or 3+ neighbors, in which
# case it is a dead-end or an intersection of multiple streets or it has
# 2 neighbors but 3 degree (indicating a change from oneway to twoway)
# or more than 4 degree (indicating a parallel edge) and thus is an
# endpoint
return True

# rule 4
if not strict:
# non-strict mode: do its incident edges have different OSM IDs?
# first collect all the OSM way IDs for incoming edges
# then collect all the OSM way IDs for outgoing edges
# if there is more than 1 OSM ID then it is an endpoint, otherwise not
incoming = [G.edges[u, node, k]["osmid"] for u in G.predecessors(node) for k in G[u][node]]
outgoing = [G.edges[node, v, k]["osmid"] for v in G.successors(node) for k in G[node][v]]
return len(set(incoming + outgoing)) > 1
# RULE 4
# non-strict mode: do its incident edges have different attr values? for
# each attribute to check, collect the attribute's values in all inbound
# and outbound edges. if there is more than 1 unique value then then this
# node is an endpoint
if endpoint_attrs is not None:
for attr in endpoint_attrs:
in_values = {v for _, _, v in G.in_edges(node, data=attr, keys=False)}
out_values = {v for _, _, v in G.out_edges(node, data=attr, keys=False)}
if len(in_values | out_values) > 1:
return True

# if none of the preceding rules passed, then it is not an endpoint
return False
Expand Down Expand Up @@ -151,7 +161,7 @@ def _build_path(G, endpoint, endpoint_successor, endpoints):
return path


def _get_paths_to_simplify(G, strict=True):
def _get_paths_to_simplify(G, endpoint_attrs):
"""
Generate all the paths to be simplified between endpoint nodes.

Expand All @@ -162,17 +172,19 @@ def _get_paths_to_simplify(G, strict=True):
----------
G : networkx.MultiDiGraph
input graph
strict : bool
if False, allow nodes to be end points even if they fail all other rules
but have edges with different OSM IDs
endpoint_attrs : iterable
An iterable of edge attribute names for relaxing the strictness of
endpoint determination. If not None, a node is an endpoint if its
incident edges have different values then each other for any of the
edge attributes in `endpoint_attrs`.

Yields
------
path_to_simplify : list
a generator of paths to simplify
"""
# first identify all the nodes that are endpoints
endpoints = {n for n in G.nodes if _is_endpoint(G, n, strict=strict)}
endpoints = {n for n in G.nodes if _is_endpoint(G, n, endpoint_attrs)}
utils.log(f"Identified {len(endpoints):,} edge endpoints")

# for each endpoint node, look at each of its successor nodes
Expand All @@ -185,7 +197,7 @@ def _get_paths_to_simplify(G, strict=True):
yield _build_path(G, endpoint, successor, endpoints)


def _remove_rings(G):
def _remove_rings(G, endpoint_attrs):
"""
Remove all self-contained rings from a graph.

Expand All @@ -196,6 +208,11 @@ def _remove_rings(G):
----------
G : networkx.MultiDiGraph
input graph
endpoint_attrs : iterable
An iterable of edge attribute names for relaxing the strictness of
endpoint determination. If not None, a node is an endpoint if its
incident edges have different values then each other for any of the
edge attributes in `endpoint_attrs`.

Returns
-------
Expand All @@ -204,35 +221,45 @@ def _remove_rings(G):
"""
nodes_in_rings = set()
for wcc in nx.weakly_connected_components(G):
if not any(_is_endpoint(G, n) for n in wcc):
if not any(_is_endpoint(G, n, endpoint_attrs) for n in wcc):
nodes_in_rings.update(wcc)
G.remove_nodes_from(nodes_in_rings)
return G


def simplify_graph(G, strict=True, remove_rings=True, track_merged=False):
def simplify_graph(G, strict=None, endpoint_attrs=None, remove_rings=True, track_merged=False): # noqa: C901
"""
Simplify a graph's topology by removing interstitial nodes.

Simplifies graph topology by removing all nodes that are not intersections
or dead-ends. Create an edge directly between the end points that
encapsulate them, but retain the geometry of the original edges, saved as
a new `geometry` attribute on the new edge. Note that only simplified
edges receive a `geometry` attribute. Some of the resulting consolidated
edges may comprise multiple OSM ways, and if so, their multiple attribute
values are stored as a list. Optionally, the simplified edges can receive
a `merged_edges` attribute that contains a list of all the (u, v) node
pairs that were merged together.
This simplifies graph topology by removing all nodes that are not
intersections or dead-ends, by creating an edge directly between the end
points that encapsulate them while retaining the full geometry of the
original edges, saved as a new `geometry` attribute on the new edge.

Note that only simplified edges receive a `geometry` attribute. Some of
the resulting consolidated edges may comprise multiple OSM ways, and if
so, their multiple attribute values are stored as a list. Optionally, the
simplified edges can receive a `merged_edges` attribute that contains a
list of all the (u, v) node pairs that were merged together.

Use the `endpoint_attrs` parameter to relax simplification strictness. For
example, `endpoint_attrs=['osmid']` will retain every node whose incident
edges have different OSM IDs. This lets you keep nodes at elbow two-way
intersections (but be aware that sometimes individual blocks have multiple
OSM IDs within them too). You could also use this parameter to retain
nodes where sidewalks or bike lanes begin/end in the middle of a block.

Parameters
----------
G : networkx.MultiDiGraph
input graph
strict : bool
if False, allow nodes to be end points even if they fail all other
rules but have incident edges with different OSM IDs. Lets you keep
nodes at elbow two-way intersections, but sometimes individual blocks
have multiple OSM IDs within them too.
deprecated, do not use
endpoint_attrs : iterable
An iterable of edge attribute names for relaxing the strictness of
endpoint determination. If not None, a node is an endpoint if its
incident edges have different values then each other for any of the
edge attributes in `endpoint_attrs`.
remove_rings : bool
if True, remove isolated self-contained rings that have no endpoints
track_merged : bool
Expand All @@ -245,6 +272,17 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False):
topologically simplified graph, with a new `geometry` attribute on
each simplified edge
"""
if strict is not None:
msg = (
"The `strict` parameter has been deprecated and will be removed in "
"the v2.0.0 release. Use the `endpoint_attrs` parameter instead to "
"relax simplification strictness. For example, `endpoint_attrs=None` "
"reproduces the old `strict=True` behvavior and `endpoint_attrs=['osmid']` "
"reproduces the old `strict=False` behavior."
gboeing marked this conversation as resolved.
Show resolved Hide resolved
)
# maintain old behavior if strict is passed during deprecation
endpoint_attrs = None if strict else ["osmid"]

if "simplified" in G.graph and G.graph["simplified"]: # pragma: no cover
msg = "This graph has already been simplified, cannot simplify it again."
raise GraphSimplificationError(msg)
Expand All @@ -262,7 +300,7 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False):
all_edges_to_add = []

# generate each path that needs to be simplified
for path in _get_paths_to_simplify(G, strict=strict):
for path in _get_paths_to_simplify(G, endpoint_attrs):
# add the interstitial edges we're removing to a list so we can retain
# their spatial geometry
merged_edges = []
Expand Down Expand Up @@ -332,7 +370,7 @@ def simplify_graph(G, strict=True, remove_rings=True, track_merged=False):
G.remove_nodes_from(set(all_nodes_to_remove))

if remove_rings:
G = _remove_rings(G)
G = _remove_rings(G, endpoint_attrs)

# mark the graph as having been simplified
G.graph["simplified"] = True
Expand Down Expand Up @@ -523,16 +561,14 @@ def _consolidate_intersections_rebuild_graph(G, tolerance=10, reconnect_edges=Tr
wccs = list(nx.weakly_connected_components(G.subgraph(nodes_subset.index)))
if len(wccs) > 1:
# if there are multiple components in this cluster
suffix = 0
for wcc in wccs:
for suffix, wcc in enumerate(wccs):
# set subcluster xy to the centroid of just these nodes
idx = list(wcc)
subcluster_centroid = node_points.loc[idx].unary_union.centroid
gdf.loc[idx, "x"] = subcluster_centroid.x
gdf.loc[idx, "y"] = subcluster_centroid.y
# move to subcluster by appending suffix to cluster label
gdf.loc[idx, "cluster"] = f"{cluster_label}-{suffix}"
suffix += 1

# give nodes unique integer IDs (subclusters with suffixes are strings)
gdf["cluster"] = gdf["cluster"].factorize()[0]
Expand Down