From ed95c94aac1a13b634dbe210f568301ceb546082 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 2 Jun 2023 14:26:57 +0300 Subject: [PATCH 01/13] Unify tests --- subway_structure.py | 2 +- tests/README.md | 13 + tests/assets/kuntsevskaya_centers.json | 28 -- tests/assets/tiny_world.osm | 217 +++++++++++ tests/assets/tiny_world_gtfs.zip | Bin 0 -> 4775 bytes tests/sample_data_for_build_tracks.py | 154 ++++++-- ... => sample_data_for_center_calculation.py} | 18 +- tests/sample_data_for_error_messages.py | 49 ++- tests/sample_data_for_outputs.py | 345 ++++++++++++++++++ tests/test_build_tracks.py | 71 ++-- tests/test_center_calculation.py | 29 +- tests/test_error_messages.py | 17 +- tests/test_gtfs_processor.py | 73 +++- tests/test_storage.py | 26 ++ tests/util.py | 215 +++++++++-- 15 files changed, 1086 insertions(+), 171 deletions(-) create mode 100644 tests/README.md delete mode 100644 tests/assets/kuntsevskaya_centers.json create mode 100644 tests/assets/tiny_world.osm create mode 100644 tests/assets/tiny_world_gtfs.zip rename tests/{assets/kuntsevskaya_transfer.osm => sample_data_for_center_calculation.py} (84%) create mode 100644 tests/sample_data_for_outputs.py create mode 100644 tests/test_storage.py diff --git a/subway_structure.py b/subway_structure.py index 823aea61..bb38f857 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -673,7 +673,7 @@ def __init__(self, relation, city, master=None): self.stops = [] # List of RouteStop # Would be a list of (lon, lat) for the longest stretch. Can be empty. self.tracks = None - # Index of the fist stop that is located on/near the self.tracks + # Index of the first stop that is located on/near the self.tracks self.first_stop_on_rails_index = None # Index of the last stop that is located on/near the self.tracks self.last_stop_on_rails_index = None diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..d6da4668 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,13 @@ +To perform tests manually, run this command from the top directory +of the repository: + +```bash +python -m unittest discover tests +``` + +or simply + +```bash +python -m unittest +``` + diff --git a/tests/assets/kuntsevskaya_centers.json b/tests/assets/kuntsevskaya_centers.json deleted file mode 100644 index 36317ec5..00000000 --- a/tests/assets/kuntsevskaya_centers.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "w38836456": { - "lat": 55.73064775, - "lon": 37.446065950000005 - }, - "w489951237": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r7588527": { - "lat": 55.73066371666667, - "lon": 37.44604881666667 - }, - "r7588528": { - "lat": 55.73075192499999, - "lon": 37.44609837 - }, - "r7588561": { - "lat": 55.73070782083333, - "lon": 37.44607359333334 - }, - "r13426423": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r100": null, - "r101": null -} diff --git a/tests/assets/tiny_world.osm b/tests/assets/tiny_world.osm new file mode 100644 index 00000000..6ee20965 --- /dev/null +++ b/tests/assets/tiny_world.osm @@ -0,0 +1,217 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/tiny_world_gtfs.zip b/tests/assets/tiny_world_gtfs.zip new file mode 100644 index 0000000000000000000000000000000000000000..ef7a66a7a36f903560f0d85b419ab67255481f21 GIT binary patch literal 4775 zcma)AOOG2x5Oy9Q#ljGYoREuyKi?|z=&zkX>jpho)I zd|#S^^ku50E)Y`AzLDDEqiqgbOmNpt%f1-T+eU$Fa(H$&N7$^^))rz(Zb67vbT%him zxLL7U(VBV%@$cNjf3reQ0v%|#qHDJozrQ$pA2y?Q4R$MSvg?|yF!m&gSYKk2-rR4D zj*DcS$*5(_II;9X^Nf~0O!MfNh+?C6EaY@!Od4%;VwXCKOi?5cQzJsetSwvbbP4|u zLCLWxmO4%IQxl7^bb6}K3T0=!Gk$g%b7t^UY*!)tfH~6jSYu&vM=W1es0DNQ*=6`* zed$KSu7wvr+3u`m&A8S0}vHi~E1A(>+T+oif(<#sD8>QS=_d;!9J z6&<)*rjBC^`wExbHZ>3sTM&afp48-p^q_#! zlGM#vKQ6w1@HKK3wKvcrotiAxmg=ciJ+RF&d8W#}DX2$cM&ZUYo4sXXWfK|~y4+hP z_KYY>jp``YrBRT}#AxW!ER{g5zs1a>K&Hl2*E`Rjz5C$uU_k9nmtL1tdvX<1 zRV>=t znT6a{9Es_i$^6Nb-3=`ACl~cQ!gyZt8B3W59Dg0ceSKdTS4;?#~Mqal}R6kg$qZuCm|=GUCXV!V*TPN)DJ z4$fYmL(+uPF9t~(bNE|H0@=E%SO_O$nWNQkR#uSL)MKe|i0=xuU^3=86mL@;4w59C za2&!@=_`V8G7WIHAd_ooWS5C-$PdN=&<)2M`_)#bQwQq4p69oI`Qiy)3$+m%LRVI- za22Nrzq0ujrGwl~%Df@nA_;ukB%_GOFm_tFrIe+dAU)-KQ%p&YXGS$89&Er#Er3?eEZ0L_G^$o;neXv @@ -37,7 +38,11 @@ """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -55,7 +60,8 @@ "positions_on_rails": [], }, }, - "Only 2 stations connected with rails": { + { + "name": "Only 2 stations connected with rails", "xml": """ @@ -100,7 +106,11 @@ """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -124,7 +134,8 @@ "positions_on_rails": [[0], [1]], }, }, - "Only 6 stations, no rails": { + { + "name": "Only 6 stations, no rails", "xml": """ @@ -190,7 +201,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -212,7 +227,8 @@ "positions_on_rails": [], }, }, - "One rail line connecting all stations": { + { + "name": "One rail line connecting all stations", "xml": """ @@ -289,7 +305,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -325,7 +345,8 @@ "positions_on_rails": [[0], [1], [2], [3], [4], [5]], }, }, - "One rail line connecting all stations except the last": { + { + "name": "One rail line connecting all stations except the last", "xml": """ @@ -401,7 +422,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -435,7 +460,8 @@ "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist": { + { + "name": "One rail line connecting all stations except the first", "xml": """ @@ -511,7 +537,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -545,7 +575,11 @@ "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist and the last": { + { + "name": ( + "One rail line connecting all stations " + "except the first and the last", + ), "xml": """ @@ -620,7 +654,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -652,7 +690,8 @@ "positions_on_rails": [[0], [1], [2], [3]], }, }, - "One rail line connecting only 2 first stations": { + { + "name": "One rail line connecting only 2 first stations", "xml": """ @@ -725,7 +764,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -753,7 +796,8 @@ "positions_on_rails": [[0], [1]], }, }, - "One rail line connecting only 2 last stations": { + { + "name": "One rail line connecting only 2 last stations", "xml": """ @@ -826,7 +870,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (4.0, 0.0), (5.0, 0.0), @@ -854,7 +902,8 @@ "positions_on_rails": [[0], [1]], }, }, - "One rail connecting all stations and protruding at both ends": { + { + "name": "One rail connecting all stations and protruding at both ends", "xml": """ @@ -937,7 +986,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -977,10 +1030,11 @@ "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "Several rails with reversed order for backward route, " - "connecting all stations and protruding at both ends" - ): { + { + "name": ( + "Several rails with reversed order for backward route, " + "connecting all stations and protruding at both ends" + ), "xml": """ @@ -1069,7 +1123,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -1109,10 +1167,11 @@ "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "One rail laying near all stations requiring station projecting, " - "protruding at both ends" - ): { + { + "name": ( + "One rail laying near all stations requiring station projecting, " + "protruding at both ends" + ), "xml": """ @@ -1189,7 +1248,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (6.0, 0.0), @@ -1227,7 +1290,8 @@ ], }, }, - "One rail laying near all stations except the first and last": { + { + "name": "One rail laying near all stations except the first and last", "xml": """ @@ -1304,7 +1368,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (4.0, 0.0), @@ -1330,7 +1398,8 @@ "positions_on_rails": [[0], [1 / 3], [2 / 3], [1]], }, }, - "Circle route without rails": { + { + "name": "Circle route without rails", "xml": """ @@ -1377,7 +1446,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -1398,7 +1471,8 @@ "positions_on_rails": [], }, }, - "Circle route with closed rail line connecting all stations": { + { + "name": "Circle route with closed rail line connecting all stations", "xml": """ @@ -1455,7 +1529,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [ (0.0, 0.0), (0.0, 1.0), @@ -1488,4 +1566,4 @@ "positions_on_rails": [[0, 4], [1], [2], [3], [0, 4]], }, }, -} +] diff --git a/tests/assets/kuntsevskaya_transfer.osm b/tests/sample_data_for_center_calculation.py similarity index 84% rename from tests/assets/kuntsevskaya_transfer.osm rename to tests/sample_data_for_center_calculation.py index 48bf044e..49ab3688 100644 --- a/tests/assets/kuntsevskaya_transfer.osm +++ b/tests/sample_data_for_center_calculation.py @@ -1,4 +1,7 @@ - +metro_samples = [ + { + "name": "Transfer at Kuntsevskaya", + "xml": """ @@ -80,3 +83,16 @@ +""", # noqa: E501 + "expected_centers": { + "w38836456": {"lat": 55.73064775, "lon": 37.446065950000005}, + "w489951237": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r7588527": {"lat": 55.73066371666667, "lon": 37.44604881666667}, + "r7588528": {"lat": 55.73075192499999, "lon": 37.44609837}, + "r7588561": {"lat": 55.73070782083333, "lon": 37.44607359333334}, + "r13426423": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r100": None, + "r101": None, + }, + }, +] diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 9d5c5fcd..9bea1c72 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -1,5 +1,6 @@ -sample_networks = { - "No errors": { +metro_samples = [ + { + "name": "No errors", "xml": """ @@ -38,7 +39,11 @@ """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -46,7 +51,8 @@ "warnings": [], "notices": [], }, - "Bad station order": { + { + "name": "Bad station order", "xml": """ @@ -99,7 +105,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -112,7 +122,8 @@ "warnings": [], "notices": [], }, - "Angle < 20 degrees": { + { + "name": "Angle < 20 degrees", "xml": """ @@ -159,7 +170,11 @@ """, - "num_stations": 3, + "cities_info": [ + { + "num_stations": 3, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -172,7 +187,8 @@ "warnings": [], "notices": [], }, - "Angle between 20 and 45 degrees": { + { + "name": "Angle between 20 and 45 degrees", "xml": """ @@ -219,7 +235,11 @@ """, - "num_stations": 3, + "cities_info": [ + { + "num_stations": 3, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -232,7 +252,8 @@ 'is too narrow, 27 degrees (relation 2, "Backward")', ], }, - "Stops unordered along tracks provided each angle > 45 degrees": { + { + "name": "Unordered stops provided each angle > 45 degrees", "xml": """ @@ -300,7 +321,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -313,4 +338,4 @@ "warnings": [], "notices": [], }, -} +] diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py new file mode 100644 index 00000000..3c2a590f --- /dev/null +++ b/tests/sample_data_for_outputs.py @@ -0,0 +1,345 @@ +metro_samples = [ + { + "name": "tiny_world", + "xml_file": """assets/tiny_world.osm""", + "cities_info": [ + { + "id": 1, + "name": "Intersecting 2 metro lines", + "country": "World", + "continent": "Africa", + "num_stations": 6, + "num_lines": 2, + "num_light_lines": 0, + "num_interchanges": 1, + "bbox": "-179, -89, 179, 89", + "networks": "network-1", + }, + { + "id": 2, + "name": "One light rail line", + "country": "World", + "continent": "Africa", + "num_stations": 2, + "num_lines": 0, + "num_light_lines": 1, + "num_interchanges": 0, + "bbox": "-179, -89, 179, 89", + "networks": "network-2", + }, + ], + "gtfs_file": "assets/tiny_world_gtfs.zip", + "json_dump": """ +{ + "stopareas": { + "n1": { + "id": "n1", + "center": [ + 0, + 0 + ], + "name": "Station 1", + "entrances": [] + }, + "r1": { + "id": "r1", + "center": [ + 0.00470373068, + 0.0047037307 + ], + "name": "Station 2", + "entrances": [] + }, + "r3": { + "id": "r3", + "center": [ + 0.01012040581, + 0.0097589171 + ], + "name": "Station 3", + "entrances": [] + }, + "n4": { + "id": "n4", + "center": [ + 0, + 0.01 + ], + "name": "Station 4", + "entrances": [] + }, + "r2": { + "id": "r2", + "center": [ + 0.0047718624, + 0.00514739839 + ], + "name": "Station 5", + "entrances": [] + }, + "n6": { + "id": "n6", + "center": [ + 0.01, + 0 + ], + "name": "Station 6", + "entrances": [] + }, + "r4": { + "id": "r4", + "center": [ + 0.009716854315, + 0.010286367745 + ], + "name": "Station 7", + "entrances": [] + }, + "r16": { + "id": "r16", + "center": [ + 0.012405493905, + 0.014377764559999999 + ], + "name": "Station 8", + "entrances": [] + } + }, + "networks": { + "Intersecting 2 metro lines": { + "id": 1, + "name": "Intersecting 2 metro lines", + "routes": [ + { + "id": "r15", + "mode": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "#0000ff", + "infill": null, + "itineraries": [ + { + "id": "r7", + "tracks": [ + [ + 0, + 0 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0.009939661455227341, + 0.009939661455455193 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n1", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 741 + }, + { + "stoparea_id": "r3", + "distance": 1565 + } + ] + }, + { + "id": "r8", + "tracks": [ + [ + 0.009939661455227341, + 0.009939661455455193 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r3", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 824 + }, + { + "stoparea_id": "n1", + "distance": 1565 + } + ] + } + ] + }, + { + "id": "r14", + "mode": "subway", + "ref": "2", + "name": "Red Line", + "colour": "#ff0000", + "infill": null, + "itineraries": [ + { + "id": "r12", + "tracks": [ + [ + 0, + 0.01 + ], + [ + 0.01, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n4", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 758 + }, + { + "stoparea_id": "n6", + "distance": 1575 + } + ] + }, + { + "id": "r13", + "tracks": [ + [ + 0.01, + 0 + ], + [ + 0, + 0.01 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n6", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 817 + }, + { + "stoparea_id": "n4", + "distance": 1575 + } + ] + } + ] + } + ] + }, + "One light rail line": { + "id": 2, + "name": "One light rail line", + "routes": [ + { + "id": "r11", + "mode": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "#a52a2a", + "infill": "#ffffff", + "itineraries": [ + { + "id": "r9", + "tracks": [ + [ + 0.00976752835, + 0.01025306758 + ], + [ + 0.01245616794, + 0.01434446439 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r4", + "distance": 0 + }, + { + "stoparea_id": "r16", + "distance": 545 + } + ] + }, + { + "id": "r10", + "tracks": [ + [ + 0.012321033122529725, + 0.014359650255679167 + ], + [ + 0.00966618028, + 0.01031966791 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r16", + "distance": 0 + }, + { + "stoparea_id": "r4", + "distance": 538 + } + ] + } + ] + } + ] + } + }, + "transfers": [ + [ + "r1", + "r2" + ], + [ + "r3", + "r4" + ] + ] +} +""", + }, +] diff --git a/tests/test_build_tracks.py b/tests/test_build_tracks.py index 14ea86b5..a1b6a6c6 100644 --- a/tests/test_build_tracks.py +++ b/tests/test_build_tracks.py @@ -1,24 +1,13 @@ -""" -To perform tests manually, run this command from the top directory -of the repository: - -> python -m unittest discover tests - -or simply - -> python -m unittest -""" - - -from tests.sample_data_for_build_tracks import sample_networks +from tests.sample_data_for_build_tracks import metro_samples from tests.util import TestCase class TestOneRouteTracks(TestCase): """Test tracks extending and truncating on one-route networks""" - def prepare_city_routes(self, network) -> tuple: - city = self.validate_city(network) + def prepare_city_routes(self, metro_sample: dict) -> tuple: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] self.assertTrue(city.is_good) @@ -30,56 +19,56 @@ def prepare_city_routes(self, network) -> tuple: return fwd_route, bwd_route - def _test_tracks_extending_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_extending_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) self.assertEqual( fwd_route.tracks, - network_data["tracks"], + metro_sample["tracks"], "Wrong tracks", ) extended_tracks = fwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"], + metro_sample["extended_tracks"], "Wrong tracks after extending", ) self.assertEqual( bwd_route.tracks, - network_data["tracks"][::-1], + metro_sample["tracks"][::-1], "Wrong backward tracks", ) extended_tracks = bwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"][::-1], + metro_sample["extended_tracks"][::-1], "Wrong backward tracks after extending", ) - def _test_tracks_truncating_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_truncating_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) truncated_tracks = fwd_route.get_truncated_tracks(fwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"], + metro_sample["truncated_tracks"], "Wrong tracks after truncating", ) truncated_tracks = bwd_route.get_truncated_tracks(bwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"][::-1], + metro_sample["truncated_tracks"][::-1], "Wrong backward tracks after truncating", ) - def _test_stop_positions_on_rails_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_stop_positions_on_rails_for_network(self, sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(sample) for route, route_label in zip( (fwd_route, bwd_route), ("forward", "backward") ): - route_data = network_data[route_label] + route_data = sample[route_label] for attr in ( "first_stop_on_rails_index", @@ -97,21 +86,27 @@ def _test_stop_positions_on_rails_for_network(self, network_data): rs.positions_on_rails for rs in route.stops[first_ind : last_ind + 1] # noqa E203 ] - self.assertListAlmostEqual( + self.assertSequenceAlmostEqual( positions_on_rails, route_data["positions_on_rails"] ) def test_tracks_extending(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_extending_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_extending_for_network(sample) def test_tracks_truncating(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_truncating_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_truncating_for_network(sample) def test_stop_position_on_rails(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_stop_positions_on_rails_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_stop_positions_on_rails_for_network(sample) diff --git a/tests/test_center_calculation.py b/tests/test_center_calculation.py index 4f01a3cf..0e423605 100644 --- a/tests/test_center_calculation.py +++ b/tests/test_center_calculation.py @@ -1,28 +1,28 @@ -import json -from pathlib import Path +import io from unittest import TestCase from process_subways import calculate_centers from subway_io import load_xml +from tests.sample_data_for_center_calculation import metro_samples class TestCenterCalculation(TestCase): """Test center calculation. Test data [should] contain among others the following edge cases: - - an empty relation. It's element should not obtain "center" key. - - relation as member of relation, the child relation following the parent - in the OSM XML file. + - an empty relation. Its element should not obtain "center" key. + - relation as member of another relation, the child relation following + the parent in the OSM XML. - relation with incomplete members (broken references). - relations with cyclic references. """ - ASSETS_PATH = Path(__file__).resolve().parent / "assets" - OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm") - CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json") - - def test__calculate_centers(self) -> None: - elements = load_xml(self.OSM_DATA) + def test_calculate_centers(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_calculate_centers_for_sample(sample) + def _test_calculate_centers_for_sample(self, metro_sample: dict) -> None: + elements = load_xml(io.BytesIO(metro_sample["xml"].encode())) calculate_centers(elements) elements_dict = { @@ -36,12 +36,11 @@ def test__calculate_centers(self) -> None: if "center" in el } - with open(self.CORRECT_CENTERS) as f: - correct_centers = json.load(f) + expected_centers = metro_sample["expected_centers"] - self.assertTrue(set(calculated_centers).issubset(correct_centers)) + self.assertTrue(set(calculated_centers).issubset(expected_centers)) - for k, correct_center in correct_centers.items(): + for k, correct_center in expected_centers.items(): if correct_center is None: self.assertNotIn("center", elements_dict[k]) else: diff --git a/tests/test_error_messages.py b/tests/test_error_messages.py index 12a5583d..aee6f48d 100644 --- a/tests/test_error_messages.py +++ b/tests/test_error_messages.py @@ -1,4 +1,4 @@ -from tests.sample_data_for_error_messages import sample_networks +from tests.sample_data_for_error_messages import metro_samples from tests.util import TestCase @@ -7,16 +7,19 @@ class TestValidationMessages(TestCase): on different types of errors in input OSM data. """ - def _test_validation_messages_for_network(self, network_data): - city = self.validate_city(network_data) + def _test_validation_messages_for_network( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] for err_level in ("errors", "warnings", "notices"): self.assertListEqual( sorted(getattr(city, err_level)), - sorted(network_data[err_level]), + sorted(metro_sample[err_level]), ) def test_validation_messages(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_validation_messages_for_network(network_data) + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_validation_messages_for_network(sample) diff --git a/tests/test_gtfs_processor.py b/tests/test_gtfs_processor.py index 5a234e86..86d1cac6 100644 --- a/tests/test_gtfs_processor.py +++ b/tests/test_gtfs_processor.py @@ -1,9 +1,13 @@ -from unittest import TestCase +import codecs +import csv +from functools import partial +from pathlib import Path +from zipfile import ZipFile -from processors.gtfs import ( - dict_to_row, - GTFS_COLUMNS, -) +from processors._common import transit_to_dict +from processors.gtfs import dict_to_row, GTFS_COLUMNS, transit_data_to_gtfs +from tests.util import TestCase +from tests.sample_data_for_outputs import metro_samples class TestGTFS(TestCase): @@ -94,3 +98,62 @@ def test__dict_to_row__numeric_values(self) -> None: self.assertListEqual( dict_to_row(shape["shape_data"], "shapes"), shape["answer"] ) + + def test__transit_data_to_gtfs(self) -> None: + for metro_sample in metro_samples: + cities, transfers = self.prepare_cities(metro_sample) + calculated_transit_data = transit_to_dict(cities, transfers) + calculated_gtfs_data = transit_data_to_gtfs( + calculated_transit_data + ) + + control_gtfs_data = self._readGtfs( + Path(__file__).resolve().parent / metro_sample["gtfs_file"] + ) + self._compareGtfs(calculated_gtfs_data, control_gtfs_data) + + @staticmethod + def _readGtfs(filepath: str) -> dict: + gtfs_data = dict() + with ZipFile(filepath) as zf: + for gtfs_feature in GTFS_COLUMNS: + with zf.open(f"{gtfs_feature}.txt") as f: + reader = csv.reader(codecs.iterdecode(f, "utf-8")) + next(reader) # read header + rows = list(reader) + gtfs_data[gtfs_feature] = rows + return gtfs_data + + def _compareGtfs( + self, calculated_gtfs_data: dict, control_gtfs_data: dict + ) -> None: + for gtfs_feature in GTFS_COLUMNS: + calculated_rows = sorted( + map( + partial(dict_to_row, record_type=gtfs_feature), + calculated_gtfs_data[gtfs_feature], + ) + ) + control_rows = sorted(control_gtfs_data[gtfs_feature]) + + self.assertEqual(len(calculated_rows), len(control_rows)) + + for i, (calculated_row, control_row) in enumerate( + zip(calculated_rows, control_rows) + ): + self.assertEqual( + len(calculated_row), + len(control_row), + f"Different length of {i}-th row of {gtfs_feature}", + ) + for calculated_value, control_value in zip( + calculated_row, control_row + ): + if calculated_value is None: + self.assertEqual(control_value, "", f"in {i}-th row") + else: # convert str to float/int/str + self.assertAlmostEqual( + calculated_value, + type(calculated_value)(control_value), + places=10, + ) diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 00000000..978529f0 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,26 @@ +import json + +from processors._common import transit_to_dict +from tests.sample_data_for_outputs import metro_samples +from tests.util import TestCase, TestTransitDataMixin + + +class TestStorage(TestCase, TestTransitDataMixin): + def test_storage(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_storage_for_sample(sample) + + def _test_storage_for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + + calculated_transit_data = transit_to_dict(cities, transfers) + + control_transit_data = json.loads(metro_sample["json_dump"]) + control_transit_data["transfers"] = set( + map(tuple, control_transit_data["transfers"]) + ) + + self.compare_transit_data( + calculated_transit_data, control_transit_data + ) diff --git a/tests/util.py b/tests/util.py index efab8c22..56b1962e 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,15 +1,23 @@ import io +from collections.abc import Sequence, Mapping +from operator import itemgetter +from pathlib import Path +from typing import Any from unittest import TestCase as unittestTestCase +from process_subways import ( + add_osm_elements_to_cities, + validate_cities, + calculate_centers, +) from subway_io import load_xml -from subway_structure import City +from subway_structure import City, find_transfers class TestCase(unittestTestCase): """TestCase class for testing the Subway Validator""" CITY_TEMPLATE = { - "id": 1, "name": "Null Island", "country": "World", "continent": "Africa", @@ -21,29 +29,184 @@ class TestCase(unittestTestCase): "num_interchanges": 0, } - def validate_city(self, network) -> City: - city_data = self.CITY_TEMPLATE.copy() - for attr in self.CITY_TEMPLATE.keys(): - if attr in network: - city_data[attr] = network[attr] - - city = City(city_data) - elements = load_xml(io.BytesIO(network["xml"].encode("utf-8"))) - for el in elements: - city.add(el) - city.extract_routes() - city.validate() - return city - - def assertListAlmostEqual(self, list1, list2, places=10) -> None: - if not (isinstance(list1, list) and isinstance(list2, list)): + @classmethod + def setUpClass(cls) -> None: + cls.city_class = City + + def prepare_cities(self, metro_sample: dict) -> tuple: + """Load cities from file/string, validate them and return cities + and transfers. + """ + + def assign_unique_id(city_info: dict, cities_info: list[dict]) -> None: + """city_info - newly added city, cities_info - already added + cities. Check city id uniqueness / assign unique id to the city. + """ + occupied_ids = set(c["id"] for c in cities_info) + if "id" in city_info: + if city_info["id"] in occupied_ids: + raise RuntimeError("Not unique city ids in test data") + else: + city_info["id"] = max(occupied_ids, default=1) + 1 + + cities_given_info = metro_sample["cities_info"] + cities_info = list() + for city_given_info in cities_given_info: + city_info = self.CITY_TEMPLATE.copy() + for attr in city_given_info.keys(): + city_info[attr] = city_given_info[attr] + assign_unique_id(city_info, cities_info) + cities_info.append(city_info) + + if len(set(ci["name"] for ci in cities_info)) < len(cities_info): + raise RuntimeError("Not unique city names in test data") + + cities = list(map(self.city_class, cities_info)) + if "xml" in metro_sample: + xml_file = io.BytesIO(metro_sample["xml"].encode()) + else: + xml_file = ( + Path(__file__).resolve().parent / metro_sample["xml_file"] + ) + elements = load_xml(xml_file) + calculate_centers(elements) + add_osm_elements_to_cities(elements, cities) + validate_cities(cities) + transfers = find_transfers(elements, cities) + return cities, transfers + + def _assertAnyAlmostEqual( + self, + first: Any, + second: Any, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Dispatcher method to other "...AlmostEqual" methods + depending on argument types. + """ + if isinstance(first, Mapping): + self.assertMappingAlmostEqual(first, second, places, ignore_keys) + elif isinstance(first, Sequence) and not isinstance( + first, (str, bytes) + ): + self.assertSequenceAlmostEqual(first, second, places, ignore_keys) + else: + self.assertAlmostEqual(first, second, places) + + def assertSequenceAlmostEqual( + self, + seq1: Sequence, + seq2: Sequence, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Compare two sequences, items of numeric types being compared + approximately, containers being approx-compared recursively. + + :param: seq1 a sequence of values of any types, including collections + :param: seq2 a sequence of values of any types, including collections + :param: places number of fractional digits (passed to + assertAlmostEqual() method of parent class) + :param: ignore_keys a set of strs with keys in dictionaries + that should be ignored during recursive comparison + :return: None + """ + if not (isinstance(seq1, Sequence) and isinstance(seq2, Sequence)): raise RuntimeError( - f"Not lists passed to the '{self.__class__.__name__}." - "assertListAlmostEqual' method" + f"Not a sequence passed to the '{self.__class__.__name__}." + "assertSequenceAlmostEqual' method" ) - self.assertEqual(len(list1), len(list2)) - for a, b in zip(list1, list2): - if isinstance(a, list) and isinstance(b, list): - self.assertListAlmostEqual(a, b, places) - else: - self.assertAlmostEqual(a, b, places) + self.assertEqual(len(seq1), len(seq2)) + for a, b in zip(seq1, seq2): + self._assertAnyAlmostEqual(a, b, places, ignore_keys) + + def assertMappingAlmostEqual( + self, + d1: Mapping, + d2: Mapping, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Compare dictionaries recursively, numeric values being compared + approximately. + + :param: d1 a mapping of arbitrary key/value types, + including collections + :param: d1 a mapping of arbitrary key/value types, + including collections + :param: places number of fractional digits (passed to + assertAlmostEqual() method of parent class) + :param: ignore_keys a set of strs with keys in dictionaries + that should be ignored during recursive comparison + :return: None + """ + if not (isinstance(d1, Mapping) and isinstance(d2, Mapping)): + raise RuntimeError( + f"Not a dictionary passed to the '{self.__class__.__name__}." + "assertMappingAlmostEqual' method" + ) + + d1_keys = set(d1.keys()) + d2_keys = set(d2.keys()) + if ignore_keys: + d1_keys -= ignore_keys + d2_keys -= ignore_keys + self.assertSetEqual(d1_keys, d2_keys) + for k in d1_keys: + v1 = d1[k] + v2 = d2[k] + self._assertAnyAlmostEqual(v1, v2, places, ignore_keys) + + +class TestTransitDataMixin: + def compare_transit_data(self, td1: dict, td2: dict) -> None: + """Compare transit data td1 and td2 remembering that: + - arrays that represent sets ("routes", "itineraries", "entrances") + should be compared without order; + - all floating-point values (coordinates) should be compared + approximately. + """ + self.assertMappingAlmostEqual( + td1, + td2, + ignore_keys={"stopareas", "routes", "itineraries"}, + ) + + networks1 = td1["networks"] + networks2 = td2["networks"] + + id_cmp = itemgetter("id") + + for network_name, network_data1 in networks1.items(): + network_data2 = networks2[network_name] + routes1 = sorted(network_data1["routes"], key=id_cmp) + routes2 = sorted(network_data2["routes"], key=id_cmp) + self.assertEqual(len(routes1), len(routes2)) + for r1, r2 in zip(routes1, routes2): + self.assertMappingAlmostEqual( + r1, r2, ignore_keys={"itineraries"} + ) + its1 = sorted(r1["itineraries"], key=id_cmp) + its2 = sorted(r2["itineraries"], key=id_cmp) + self.assertEqual(len(its1), len(its2)) + for it1, it2 in zip(its1, its2): + self.assertMappingAlmostEqual(it1, it2) + + transfers1 = td1["transfers"] + transfers2 = td2["transfers"] + self.assertSetEqual(transfers1, transfers2) + + stopareas1 = td1["stopareas"] + stopareas2 = td2["stopareas"] + self.assertMappingAlmostEqual( + stopareas1, stopareas2, ignore_keys={"entrances"} + ) + + for sa_id, sa1_data in stopareas1.items(): + sa2_data = stopareas2[sa_id] + entrances1 = sorted(sa1_data["entrances"], key=id_cmp) + entrances2 = sorted(sa2_data["entrances"], key=id_cmp) + self.assertEqual(len(entrances1), len(entrances2)) + for e1, e2 in zip(entrances1, entrances2): + self.assertMappingAlmostEqual(e1, e2) From 6c796ac8c18794b80bb9d16b6784eb6fe1efdd1f Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 23 Nov 2023 10:07:54 +0300 Subject: [PATCH 02/13] Check if stations are missing/differ in backward direction of some route --- subway_structure.py | 320 +++++++-- tests/assets/route_masters.osm | 527 ++++++++++++++ tests/assets/twin_routes.osm | 578 ++++++++++++++++ tests/assets/twin_routes_with_divergence.osm | 680 +++++++++++++++++++ tests/sample_data_for_error_messages.py | 76 ++- tests/sample_data_for_outputs.py | 10 +- tests/sample_data_for_twin_routes.py | 78 +++ tests/test_error_messages.py | 15 +- tests/test_route_master.py | 26 + 9 files changed, 2215 insertions(+), 95 deletions(-) create mode 100644 tests/assets/route_masters.osm create mode 100644 tests/assets/twin_routes.osm create mode 100644 tests/assets/twin_routes_with_divergence.osm create mode 100644 tests/sample_data_for_twin_routes.py create mode 100644 tests/test_route_master.py diff --git a/subway_structure.py b/subway_structure.py index bb38f857..e79d2130 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -12,27 +12,26 @@ ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees +SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters # If an object was moved not too far compared to previous script run, # it is likely the same object DISPLACEMENT_TOLERANCE = 300 # in meters -MODES_RAPID = set(("subway", "light_rail", "monorail", "train")) -MODES_OVERGROUND = set(("tram", "bus", "trolleybus", "aerialway", "ferry")) -DEFAULT_MODES_RAPID = set(("subway", "light_rail")) -DEFAULT_MODES_OVERGROUND = set(("tram",)) # TODO: bus and trolleybus? +MODES_RAPID = {"subway", "light_rail", "monorail", "train"} +MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} +DEFAULT_MODES_RAPID = {"subway", "light_rail"} +DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? ALL_MODES = MODES_RAPID | MODES_OVERGROUND -RAILWAY_TYPES = set( - ( - "rail", - "light_rail", - "subway", - "narrow_gauge", - "funicular", - "monorail", - "tram", - ) -) +RAILWAY_TYPES = { + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", +} CONSTRUCTION_KEYS = ( "construction", "proposed", @@ -49,7 +48,7 @@ def get_start_end_times(opening_hours): """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable - opening hours interval for the most of weekdays. + opening hours interval for the most of the weekdays. """ start_time, end_time = None, None m = START_END_TIMES_RE.match(opening_hours) @@ -102,9 +101,9 @@ def el_center(el): if not el: return None if "lat" in el: - return (el["lon"], el["lat"]) + return el["lon"], el["lat"] elif "center" in el: - return (el["center"]["lon"], el["center"]["lat"]) + return el["center"]["lon"], el["center"]["lat"] return None @@ -485,7 +484,7 @@ def __init__(self, station, city, stop_area=None): self.center[i] /= len(self.stops) + len(self.platforms) def get_elements(self): - result = set([self.id, self.station.id]) + result = {self.id, self.station.id} result.update(self.entrances) result.update(self.exits) result.update(self.stops) @@ -1156,6 +1155,12 @@ def get_truncated_tracks(self, tracks): return tracks + def are_tracks_complete(self) -> bool: + return ( + self.first_stop_on_rails_index == 0 + and self.last_stop_on_rails_index == len(self) - 1 + ) + def get_tracks_geometry(self): tracks = self.get_extended_tracks() tracks = self.get_truncated_tracks(tracks) @@ -1350,6 +1355,36 @@ def try_resort_stops(self): ] return True + def get_end_transfers(self) -> tuple[str, str]: + """Using transfer ids because a train can arrive at different + stations within a transfer. But disregard transfer that may give + an impression of a circular route (for example, + Simonis / Elisabeth station and route 2 in Brussels). + """ + return ( + (self[0].stoparea.id, self[-1].stoparea.id) + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ) + else ( + self[0].stoparea.transfer or self[0].stoparea.id, + self[-1].stoparea.transfer or self[-1].stoparea.id, + ) + ) + + def get_transfers_sequence(self) -> list[str]: + """Return a list of stoparea or transfer (if not None) ids.""" + transfer_seq = [ + stop.stoparea.transfer or stop.stoparea.id for stop in self + ] + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ): + transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() + return transfer_seq + def __len__(self): return len(self.stops) @@ -1479,13 +1514,75 @@ def add(self, route, city): else: self.interval = min(self.interval, route.interval) + # Choose minimal id for determinancy if not self.has_master and (not self.id or self.id > route.id): self.id = route.id self.routes.append(route) - if not self.best or len(route.stops) > len(self.best.stops): + if ( + not self.best + or len(route.stops) > len(self.best.stops) + or ( + # Choose route with minimal id for determinancy + len(route.stops) == len(self.best.stops) + and route.element["id"] < self.best.element["id"] + ) + ): self.best = route + def get_meaningful_routes(self) -> list[Route]: + return [route for route in self if len(route) >= 2] + + def find_twin_routes(self) -> dict[Route, Route]: + """Two routes are twins if they have the same end stations + and opposite directions, and the number of stations is + the same or almost the same. We'll then find stops that are present + in one direction and is missing in another direction - to warn. + """ + + twin_routes = {} # route => "twin" route + + for route in self.get_meaningful_routes(): + if route.is_circular: + continue # Difficult to calculate. TODO(?) in the future + if route in twin_routes: + continue + if len(route) < 2: + continue + + route_transfer_ids = set(route.get_transfers_sequence()) + ends = route.get_end_transfers() + ends_reversed = ends[::-1] + + twin_candidates = [ + r + for r in self + if not r.is_circular + and r not in twin_routes + and r.get_end_transfers() == ends_reversed + # If absolute or relative difference in station count is large, + # possibly it's an express version of a route - skip it. + and ( + abs(len(r) - len(route)) <= 2 + or abs(len(r) - len(route)) / max(len(r), len(route)) + <= 0.2 + ) + ] + + if not twin_candidates: + continue + + twin_route = min( + twin_candidates, + key=lambda r: len( + route_transfer_ids ^ set(r.get_transfers_sequence()) + ), + ) + twin_routes[route] = twin_route + twin_routes[twin_route] = route + + return twin_routes + def stop_areas(self): """Returns a list of all stations on all route variants.""" seen_ids = set() @@ -1521,6 +1618,7 @@ def __init__(self, city_data, overground=False): self.errors = [] self.warnings = [] self.notices = [] + self.id = None self.try_fill_int_attribute(city_data, "id") self.name = city_data["name"] self.country = city_data["country"] @@ -1555,7 +1653,7 @@ def __init__(self, city_data, overground=False): else: self.modes = DEFAULT_MODES_RAPID else: - self.modes = set([x.strip() for x in networks[0].split(",")]) + self.modes = {x.strip() for x in networks[0].split(",")} # Reversing bbox so it is (xmin, ymin, xmax, ymax) bbox = city_data["bbox"].split(",") @@ -1627,7 +1725,7 @@ def warn(self, message, el=None): self.warnings.append(msg) def error(self, message, el=None): - """Error if a critical problem that invalidates the city""" + """Error is a critical problem that invalidates the city.""" msg = City.log_message(message, el) self.errors.append(msg) @@ -1914,37 +2012,18 @@ def count_unused_entrances(self): f"relations: {format_elid_list(not_in_sa)}" ) - def check_return_routes(self, rmaster): - variants = {} - have_return = set() - for variant in rmaster: - if len(variant) < 2: - continue - # Using transfer ids because a train can arrive at different - # stations within a transfer. But disregard transfer that may give - # an impression of a circular route (for example, - # Simonis / Elisabeth station and route 2 in Brussels) - if variant[0].stoparea.transfer == variant[-1].stoparea.transfer: - t = (variant[0].stoparea.id, variant[-1].stoparea.id) - else: - t = ( - variant[0].stoparea.transfer or variant[0].stoparea.id, - variant[-1].stoparea.transfer or variant[-1].stoparea.id, - ) - if t in variants: - continue - variants[t] = variant.element - tr = (t[1], t[0]) - if tr in variants: - have_return.add(t) - have_return.add(tr) + def check_return_routes(self, rmaster: RouteMaster) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = rmaster.get_meaningful_routes() - if len(variants) == 0: + if len(meaningful_routes) == 0: self.error( - "An empty route master {}. Please set construction:route " - "if it is under construction".format(rmaster.id) + f"An empty route master {rmaster.id}. " + "Please set construction:route if it is under construction" ) - elif len(variants) == 1: + elif len(meaningful_routes) == 1: log_function = ( self.error if not rmaster.best.is_circular else self.notice ) @@ -1954,9 +2033,144 @@ def check_return_routes(self, rmaster): rmaster.best.element, ) else: - for t, rel in variants.items(): - if t not in have_return: - self.notice("Route does not have a return direction", rel) + all_ends = { + route.get_end_transfers(): route for route in meaningful_routes + } + for route in meaningful_routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = rmaster.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"into the {route2.id} but not included into {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"into the {route1.id} but not included into {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) def validate_lines(self): self.found_light_lines = len( diff --git a/tests/assets/route_masters.osm b/tests/assets/route_masters.osm new file mode 100644 index 00000000..0635a2bb --- /dev/null +++ b/tests/assets/route_masters.osm @@ -0,0 +1,527 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes.osm b/tests/assets/twin_routes.osm new file mode 100644 index 00000000..e2e7f428 --- /dev/null +++ b/tests/assets/twin_routes.osm @@ -0,0 +1,578 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes_with_divergence.osm b/tests/assets/twin_routes_with_divergence.osm new file mode 100644 index 00000000..057cca36 --- /dev/null +++ b/tests/assets/twin_routes_with_divergence.osm @@ -0,0 +1,680 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 9bea1c72..245cfbbe 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -42,11 +42,11 @@ "cities_info": [ { "num_stations": 2, + "num_lines": 1, + "num_light_lines": 0, + "num_interchanges": 0, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [], "warnings": [], "notices": [], @@ -110,14 +110,9 @@ "num_stations": 4, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Angle between stops around "Station 3" (2.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', + 'Angle between stops around "Station 3" (2.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 ], "warnings": [], "notices": [], @@ -175,14 +170,9 @@ "num_stations": 3, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], @@ -240,16 +230,11 @@ "num_stations": 3, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [], "warnings": [], "notices": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 2, "Backward")', # noqa: E501 ], }, { @@ -326,16 +311,45 @@ "num_stations": 4, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) ' - '(relation 1, "Forward")', - 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) ' - '(relation 2, "Backward")', + 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) (relation 1, "Forward")', # noqa: E501 + 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], }, + { + "name": ( + "Many different route masters, both on naked stations and " + "stop_positions/stop_areas/transfers, both linear and circular" + ), + "xml_file": "assets/route_masters.osm", + "cities_info": [ + { + "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4) + + (3 + 3 + 3 + 3 + 3 + 3 + 4), + "num_lines": 7 + 7, + "num_interchanges": 0 + 1, + }, + ], + "errors": [ + 'Only one route in route_master. Please check if it needs a return route (relation 162, "03: 1-2-3")' # noqa: E501 + ], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 155, "02: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 163, "04: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 164, "04: 2-1")', # noqa: E501 + 'Stop Station 2 (1.0, 0.0) is included into the r203 but not included into r204 (relation 204, "2: 3-1")', # noqa: E501 + 'Route does not have a return direction (relation 205, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 206, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-8-1")', # noqa: E501 + ], + }, ] diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py index 3c2a590f..54193537 100644 --- a/tests/sample_data_for_outputs.py +++ b/tests/sample_data_for_outputs.py @@ -6,25 +6,17 @@ { "id": 1, "name": "Intersecting 2 metro lines", - "country": "World", - "continent": "Africa", - "num_stations": 6, + "num_stations": 4 + 2, "num_lines": 2, - "num_light_lines": 0, "num_interchanges": 1, - "bbox": "-179, -89, 179, 89", "networks": "network-1", }, { "id": 2, "name": "One light rail line", - "country": "World", - "continent": "Africa", "num_stations": 2, "num_lines": 0, "num_light_lines": 1, - "num_interchanges": 0, - "bbox": "-179, -89, 179, 89", "networks": "network-2", }, ], diff --git a/tests/sample_data_for_twin_routes.py b/tests/sample_data_for_twin_routes.py new file mode 100644 index 00000000..58b9e17f --- /dev/null +++ b/tests/sample_data_for_twin_routes.py @@ -0,0 +1,78 @@ +metro_samples = [ + { + "name": ( + "Many different routes, both on naked stations and stop_positions/stop_areas/transfers, both linear and circular" # noqa: E501 + ), + "xml_file": "assets/twin_routes.osm", + "cities_info": [ + { + "num_stations": (3 + 4 + 5 + 5) + (3 + 6 + 7 + 5 + 6 + 7 + 7), + "num_lines": 4 + 7, + "num_interchanges": 0 + 2, + }, + ], + "twin_routes": { # route master => twin routes + "r10021": {"r151": "r153", "r153": "r151"}, + "r10022": {}, + "r10023": {}, + "C": {}, + "r10001": {"r201": "r202", "r202": "r201"}, + "r10002": {}, + "r10003": {"r205": "r206", "r206": "r205"}, + "r10004": {}, + "r10005": {}, + "r10006": {}, + "C3": {}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 154, "02: 4-3")', + 'Route does not have a return direction (relation 155, "02: 1-3")', + 'Route does not have a return direction (relation 156, "02: 2-4")', + 'Route does not have a return direction (relation 157, "02: 4-1")', + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-2-3-4-5-1")', # noqa: E501 + 'Stop Station 4 (3.0, 0.0) is included into the r205 but not included into r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3-5-6-7")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 6-5-3-2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-5-6-7-8-1")', # noqa: E501 + ], + }, + { + "name": "Twin routes diverging for some extent", + "xml_file": "assets/twin_routes_with_divergence.osm", + "cities_info": [ + { + "num_stations": (22 + 22 + 21 + 21) * 2, + "num_lines": 4 * 2, + "num_interchanges": 0, + }, + ], + "twin_routes": { # route master => twin routes + "r1101": {"r101": "r102", "r102": "r101"}, + "r1102": {"r103": "r104", "r104": "r103"}, + "r1103": {"r105": "r106", "r106": "r105"}, + "r1104": {"r107": "r108", "r108": "r107"}, + "r1201": {"r201": "r202", "r202": "r201"}, + "r1202": {"r203": "r204", "r204": "r203"}, + "r1203": {"r205": "r206", "r206": "r205"}, + "r1204": {"r207": "r208", "r208": "r207"}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r105 but not included into r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 105, "3: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r107 but not included into r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r205 but not included into r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 205, "13: 1-...-9-10-11-...-20")', # noqa: E501 + ], + }, +] diff --git a/tests/test_error_messages.py b/tests/test_error_messages.py index aee6f48d..c8330015 100644 --- a/tests/test_error_messages.py +++ b/tests/test_error_messages.py @@ -1,4 +1,11 @@ -from tests.sample_data_for_error_messages import metro_samples +import itertools + +from tests.sample_data_for_error_messages import ( + metro_samples as metro_samples_error, +) +from tests.sample_data_for_twin_routes import ( + metro_samples as metro_samples_route_masters, +) from tests.util import TestCase @@ -20,6 +27,10 @@ def _test_validation_messages_for_network( ) def test_validation_messages(self) -> None: - for sample in metro_samples: + for sample in itertools.chain( + metro_samples_error, metro_samples_route_masters + ): + if "errors" not in sample: + continue with self.subTest(msg=sample["name"]): self._test_validation_messages_for_network(sample) diff --git a/tests/test_route_master.py b/tests/test_route_master.py new file mode 100644 index 00000000..1bab6173 --- /dev/null +++ b/tests/test_route_master.py @@ -0,0 +1,26 @@ +from tests.util import TestCase + +from tests.sample_data_for_twin_routes import metro_samples + + +class TestRouteMaster(TestCase): + def _test_find_twin_routes_for_network(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] + + self.assertTrue(city.is_good) + + for route_master_id, expected_twin_ids in metro_sample[ + "twin_routes" + ].items(): + route_master = city.routes[route_master_id] + calculated_twins = route_master.find_twin_routes() + calculated_twin_ids = { + r1.id: r2.id for r1, r2 in calculated_twins.items() + } + self.assertDictEqual(expected_twin_ids, calculated_twin_ids) + + def test_find_twin_routes(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_find_twin_routes_for_network(sample) From 970b4a51ee25a6a03e70f3fbcadd974205597c79 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 29 Nov 2023 12:35:37 +0300 Subject: [PATCH 03/13] process train_station_entrance similar to subway_entrance --- process_subways.py | 31 ++-- scripts/download_all_subways.sh | 6 - scripts/filter_all_subways.sh | 6 - scripts/process_subways.sh | 2 +- subway_structure.py | 157 ++++++++++-------- tests/assets/tiny_world.osm | 25 +++ tests/assets/tiny_world_gtfs.zip | Bin 4775 -> 0 bytes tests/assets/tiny_world_gtfs/agency.txt | 3 + tests/assets/tiny_world_gtfs/calendar.txt | 2 + tests/assets/tiny_world_gtfs/frequencies.txt | 7 + tests/assets/tiny_world_gtfs/routes.txt | 4 + tests/assets/tiny_world_gtfs/shapes.txt | 15 ++ tests/assets/tiny_world_gtfs/stop_times.txt | 17 ++ tests/assets/tiny_world_gtfs/stops.txt | 27 +++ tests/assets/tiny_world_gtfs/transfers.txt | 5 + tests/assets/tiny_world_gtfs/trips.txt | 7 + tests/sample_data_for_outputs.py | 41 ++++- tests/test_gtfs_processor.py | 21 +-- tests/test_overpass.py | 163 +++++++++++++++++++ tests/test_station.py | 46 ++++++ 20 files changed, 474 insertions(+), 111 deletions(-) delete mode 100755 scripts/download_all_subways.sh delete mode 100755 scripts/filter_all_subways.sh delete mode 100644 tests/assets/tiny_world_gtfs.zip create mode 100644 tests/assets/tiny_world_gtfs/agency.txt create mode 100644 tests/assets/tiny_world_gtfs/calendar.txt create mode 100644 tests/assets/tiny_world_gtfs/frequencies.txt create mode 100644 tests/assets/tiny_world_gtfs/routes.txt create mode 100644 tests/assets/tiny_world_gtfs/shapes.txt create mode 100644 tests/assets/tiny_world_gtfs/stop_times.txt create mode 100644 tests/assets/tiny_world_gtfs/stops.txt create mode 100644 tests/assets/tiny_world_gtfs/transfers.txt create mode 100644 tests/assets/tiny_world_gtfs/trips.txt create mode 100644 tests/test_overpass.py create mode 100644 tests/test_station.py diff --git a/process_subways.py b/process_subways.py index 6f7e846f..1fd22621 100755 --- a/process_subways.py +++ b/process_subways.py @@ -24,7 +24,7 @@ City, CriticalValidationError, find_transfers, - get_unused_entrances_geojson, + get_unused_subway_entrances_geojson, MODES_OVERGROUND, MODES_RAPID, ) @@ -38,26 +38,37 @@ Point = tuple[float, float] -def overpass_request( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +def compose_overpass_request( + overground: bool, bboxes: list[list[float]] +) -> str: + if not bboxes: + raise RuntimeError("No bboxes given for overpass request") + query = "[out:json][timeout:1000];(" modes = MODES_OVERGROUND if overground else MODES_RAPID for bbox in bboxes: - bbox_part = "({})".format(",".join(str(coord) for coord in bbox)) + bbox_part = f"({','.join(str(coord) for coord in bbox)})" query += "(" - for mode in modes: - query += 'rel[route="{}"]{};'.format(mode, bbox_part) + for mode in sorted(modes): + query += f'rel[route="{mode}"]{bbox_part};' query += ");" query += "rel(br)[type=route_master];" if not overground: - query += "node[railway=subway_entrance]{};".format(bbox_part) - query += "rel[public_transport=stop_area]{};".format(bbox_part) + query += f"node[railway=subway_entrance]{bbox_part};" + query += f"node[railway=train_station_entrance]{bbox_part};" + query += f"rel[public_transport=stop_area]{bbox_part};" query += ( "rel(br)[type=public_transport][public_transport=stop_area_group];" ) query += ");(._;>>;);out body center qt;" logging.debug("Query: %s", query) + return query + + +def overpass_request( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[dict]: + query = compose_overpass_request(overground, bboxes) url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) response = urllib.request.urlopen(url, timeout=1000) if (r_code := response.getcode()) != 200: @@ -489,7 +500,7 @@ def main() -> None: write_recovery_data(options.recovery_path, recovery_data, cities) if options.entrances: - json.dump(get_unused_entrances_geojson(osm), options.entrances) + json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) if options.dump: if os.path.isdir(options.dump): diff --git a/scripts/download_all_subways.sh b/scripts/download_all_subways.sh deleted file mode 100755 index 2797520c..00000000 --- a/scripts/download_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Still times out, do not use unless you want to be blocked for some hours on Overpass API -TIMEOUT=2000 -QUERY='[out:json][timeout:'$TIMEOUT'];(rel["route"="subway"];rel["route"="light_rail"];rel["public_transport"="stop_area"];rel["public_transport"="stop_area_group"];node["station"="subway"];node["station"="light_rail"];node["railway"="subway_entrance"];);(._;>;);out body center qt;' -http http://overpass-api.de/api/interpreter "data==$QUERY" --timeout $TIMEOUT > subways-$(date +%y%m%d).json -http https://overpass-api.de/api/status | grep available diff --git a/scripts/filter_all_subways.sh b/scripts/filter_all_subways.sh deleted file mode 100755 index 5627f10b..00000000 --- a/scripts/filter_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -[ $# -lt 1 ] && echo 'Usage: $0 [] []' && exit 1 -OSMFILTER=${2-./osmfilter} -QRELATIONS="route=subway =light_rail =monorail route_master=subway =light_rail =monorail public_transport=stop_area =stop_area_group" -QNODES="station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes" -"$OSMFILTER" "$1" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author -o="${3:-subways-$(date +%y%m%d).osm}" diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index a27f283c..241b3c1b 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -217,7 +217,7 @@ if [ -n "${NEED_FILTER-}" ]; then check_osmctools mkdir -p $TMPDIR/osmfilter_temp/ QRELATIONS="route=subway =light_rail =monorail =train route_master=subway =light_rail =monorail =train public_transport=stop_area =stop_area_group" - QNODES="railway=station station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes train=yes" + QNODES="railway=station =subway_entrance =train_station_entrance station=subway =light_rail =monorail subway=yes light_rail=yes monorail=yes train=yes" "$OSMCTOOLS/osmfilter" "$PLANET_METRO" \ --keep= \ --keep-relations="$QRELATIONS" \ diff --git a/subway_structure.py b/subway_structure.py index e79d2130..e55bf715 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1,11 +1,12 @@ +from __future__ import annotations + import math import re from collections import Counter, defaultdict -from itertools import islice +from itertools import chain, islice from css_colours import normalize_colour - MAX_DISTANCE_TO_ENTRANCES = 300 # in meters MAX_DISTANCE_STOP_TO_LINE = 50 # in meters ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count @@ -283,13 +284,11 @@ def format_elid_list(ids): class Station: @staticmethod - def get_modes(el): - mode = el["tags"].get("station") - modes = [] if not mode else [mode] - for m in ALL_MODES: - if el["tags"].get(m) == "yes": - modes.append(m) - return set(modes) + def get_modes(el: dict) -> set[str]: + modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} + if mode := el["tags"].get("station"): + modes.add(mode) + return modes @staticmethod def is_station(el, modes): @@ -367,7 +366,9 @@ def is_track(el): return False return el["tags"].get("railway") in RAILWAY_TYPES - def __init__(self, station, city, stop_area=None): + def __init__( + self, station: Station, city: City, stop_area: StopArea | None = None + ) -> None: """Call this with a Station object.""" self.element = stop_area or station.element @@ -375,9 +376,10 @@ def __init__(self, station, city, stop_area=None): self.station = station self.stops = set() # set of el_ids of stop_positions self.platforms = set() # set of el_ids of platforms - self.exits = set() # el_id of subway_entrance for leaving the platform - self.entrances = set() # el_id of subway_entrance for entering - # the platform + self.exits = set() # el_id of subway_entrance/train_station_entrance + # for leaving the platform + self.entrances = set() # el_id of subway/train_station entrance + # for entering the platform self.center = None # lon, lat of the station centre point self.centers = {} # el_id -> (lon, lat) for all elements self.transfer = None # el_id of a transfer relation @@ -400,62 +402,9 @@ def __init__(self, station, city, stop_area=None): except ValueError as e: city.warn(str(e), stop_area) - # If we have a stop area, add all elements from it - warned_about_tracks = False - for m in stop_area["members"]: - k = el_id(m) - m_el = city.elements.get(k) - if m_el and "tags" in m_el: - if Station.is_station(m_el, city.modes): - if k != station.id: - city.error( - "Stop area has multiple stations", stop_area - ) - elif StopArea.is_stop(m_el): - self.stops.add(k) - elif StopArea.is_platform(m_el): - self.platforms.add(k) - elif m_el["tags"].get("railway") == "subway_entrance": - if m_el["type"] != "node": - city.warn("Subway entrance is not a node", m_el) - if ( - m_el["tags"].get("entrance") != "exit" - and m["role"] != "exit_only" - ): - self.entrances.add(k) - if ( - m_el["tags"].get("entrance") != "entrance" - and m["role"] != "entry_only" - ): - self.exits.add(k) - elif StopArea.is_track(m_el): - if not warned_about_tracks: - city.warn( - "Tracks in a stop_area relation", stop_area - ) - warned_about_tracks = True + self._process_members(station, city, stop_area) else: - # Otherwise add nearby entrances - center = station.center - for c_el in city.elements.values(): - if c_el.get("tags", {}).get("railway") == "subway_entrance": - c_id = el_id(c_el) - if c_id not in city.stop_areas: - c_center = el_center(c_el) - if ( - c_center - and distance(center, c_center) - <= MAX_DISTANCE_TO_ENTRANCES - ): - if c_el["type"] != "node": - city.warn( - "Subway entrance is not a node", c_el - ) - etag = c_el["tags"].get("entrance") - if etag != "exit": - self.entrances.add(c_id) - if etag != "entrance": - self.exits.add(c_id) + self._add_nearby_entrances(station, city) if self.exits and not self.entrances: city.warn( @@ -476,13 +425,77 @@ def __init__(self, station, city, stop_area=None): self.center = station.center else: self.center = [0, 0] - for sp in self.stops | self.platforms: + for sp in chain(self.stops, self.platforms): spc = self.centers[sp] for i in range(2): self.center[i] += spc[i] for i in range(2): self.center[i] /= len(self.stops) + len(self.platforms) + def _process_members( + self, station: Station, city: City, stop_area: dict + ) -> None: + # If we have a stop area, add all elements from it + tracks_detected = False + for m in stop_area["members"]: + k = el_id(m) + m_el = city.elements.get(k) + if not m_el or "tags" not in m_el: + continue + if Station.is_station(m_el, city.modes): + if k != station.id: + city.error("Stop area has multiple stations", stop_area) + elif StopArea.is_stop(m_el): + self.stops.add(k) + elif StopArea.is_platform(m_el): + self.platforms.add(k) + elif (entrance_type := m_el["tags"].get("railway")) in ( + "subway_entrance", + "train_station_entrance", + ): + if m_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", m_el) + if ( + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" + ): + self.entrances.add(k) + if ( + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" + ): + self.exits.add(k) + elif StopArea.is_track(m_el): + tracks_detected = True + + if tracks_detected: + city.warn("Tracks in a stop_area relation", stop_area) + + def _add_nearby_entrances(self, station: Station, city: City) -> None: + center = station.center + for entrance_el in ( + el + for el in city.elements.values() + if "tags" in el + and (entrance_type := el["tags"].get("railway")) + in ("subway_entrance", "train_station_entrance") + ): + entrance_id = el_id(entrance_el) + if entrance_id in city.stop_areas: + continue # This entrance belongs to some stop_area + c_center = el_center(entrance_el) + if ( + c_center + and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES + ): + if entrance_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", entrance_el) + etag = entrance_el["tags"].get("entrance") + if etag != "exit": + self.entrances.add(entrance_id) + if etag != "entrance": + self.exits.add(entrance_id) + def get_elements(self): result = {self.id, self.station.id} result.update(self.entrances) @@ -1816,7 +1829,7 @@ def make_transfer(self, sag): if len(transfer) > 1: self.transfers.append(transfer) - def extract_routes(self): + def extract_routes(self) -> None: # Extract stations processed_stop_areas = set() for el in self.elements.values(): @@ -1850,7 +1863,7 @@ def extract_routes(self): # Check that stops and platforms belong to # a single stop_area - for sp in station.stops | station.platforms: + for sp in chain(station.stops, station.platforms): if sp in self.stops_and_platforms: self.notice( f"A stop or a platform {sp} belongs to " @@ -2328,7 +2341,7 @@ def find_transfers(elements, cities): return transfers -def get_unused_entrances_geojson(elements): +def get_unused_subway_entrances_geojson(elements: list[dict]) -> dict: global used_entrances features = [] for el in elements: diff --git a/tests/assets/tiny_world.osm b/tests/assets/tiny_world.osm index 6ee20965..276fb804 100644 --- a/tests/assets/tiny_world.osm +++ b/tests/assets/tiny_world.osm @@ -56,6 +56,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -95,6 +116,8 @@ + + @@ -102,6 +125,8 @@ + + diff --git a/tests/assets/tiny_world_gtfs.zip b/tests/assets/tiny_world_gtfs.zip deleted file mode 100644 index ef7a66a7a36f903560f0d85b419ab67255481f21..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4775 zcma)AOOG2x5Oy9Q#ljGYoREuyKi?|z=&zkX>jpho)I zd|#S^^ku50E)Y`AzLDDEqiqgbOmNpt%f1-T+eU$Fa(H$&N7$^^))rz(Zb67vbT%him zxLL7U(VBV%@$cNjf3reQ0v%|#qHDJozrQ$pA2y?Q4R$MSvg?|yF!m&gSYKk2-rR4D zj*DcS$*5(_II;9X^Nf~0O!MfNh+?C6EaY@!Od4%;VwXCKOi?5cQzJsetSwvbbP4|u zLCLWxmO4%IQxl7^bb6}K3T0=!Gk$g%b7t^UY*!)tfH~6jSYu&vM=W1es0DNQ*=6`* zed$KSu7wvr+3u`m&A8S0}vHi~E1A(>+T+oif(<#sD8>QS=_d;!9J z6&<)*rjBC^`wExbHZ>3sTM&afp48-p^q_#! zlGM#vKQ6w1@HKK3wKvcrotiAxmg=ciJ+RF&d8W#}DX2$cM&ZUYo4sXXWfK|~y4+hP z_KYY>jp``YrBRT}#AxW!ER{g5zs1a>K&Hl2*E`Rjz5C$uU_k9nmtL1tdvX<1 zRV>=t znT6a{9Es_i$^6Nb-3=`ACl~cQ!gyZt8B3W59Dg0ceSKdTS4;?#~Mqal}R6kg$qZuCm|=GUCXV!V*TPN)DJ z4$fYmL(+uPF9t~(bNE|H0@=E%SO_O$nWNQkR#uSL)MKe|i0=xuU^3=86mL@;4w59C za2&!@=_`V8G7WIHAd_ooWS5C-$PdN=&<)2M`_)#bQwQq4p69oI`Qiy)3$+m%LRVI- za22Nrzq0ujrGwl~%Df@nA_;ukB%_GOFm_tFrIe+dAU)-KQ%p&YXGS$89&Er#Er3?eEZ0L_G^$o;neXv None: ) control_gtfs_data = self._readGtfs( - Path(__file__).resolve().parent / metro_sample["gtfs_file"] + Path(__file__).resolve().parent / metro_sample["gtfs_dir"] ) self._compareGtfs(calculated_gtfs_data, control_gtfs_data) @staticmethod - def _readGtfs(filepath: str) -> dict: + def _readGtfs(gtfs_dir: Path) -> dict: gtfs_data = dict() - with ZipFile(filepath) as zf: - for gtfs_feature in GTFS_COLUMNS: - with zf.open(f"{gtfs_feature}.txt") as f: - reader = csv.reader(codecs.iterdecode(f, "utf-8")) - next(reader) # read header - rows = list(reader) - gtfs_data[gtfs_feature] = rows + for gtfs_feature in GTFS_COLUMNS: + with open(gtfs_dir / f"{gtfs_feature}.txt") as f: + reader = csv.reader(f) + next(reader) # read header + rows = list(reader) + gtfs_data[gtfs_feature] = rows return gtfs_data def _compareGtfs( diff --git a/tests/test_overpass.py b/tests/test_overpass.py new file mode 100644 index 00000000..2b0afa3d --- /dev/null +++ b/tests/test_overpass.py @@ -0,0 +1,163 @@ +from unittest import TestCase, mock + +from process_subways import compose_overpass_request, overpass_request + + +class TestOverpassQuery(TestCase): + def test__compose_overpass_request__no_bboxes(self) -> None: + bboxes = [] + for overground in (True, False): + with self.subTest(msg=f"{overground=}"): + with self.assertRaises(RuntimeError): + compose_overpass_request(overground, bboxes) + + def test__compose_overpass_request__one_bbox(self) -> None: + bboxes = [[1, 2, 3, 4]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__compose_overpass_request__several_bboxes(self) -> None: + bboxes = [[1, 2, 3, 4], [5, 6, 7, 8]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="light_rail"](5,6,7,8);' + 'rel[route="monorail"](5,6,7,8);' + 'rel[route="subway"](5,6,7,8);' + 'rel[route="train"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](5,6,7,8);" + "node[railway=train_station_entrance](5,6,7,8);" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="aerialway"](5,6,7,8);' + 'rel[route="bus"](5,6,7,8);' + 'rel[route="ferry"](5,6,7,8);' + 'rel[route="tram"](5,6,7,8);' + 'rel[route="trolleybus"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__overpass_request(self) -> None: + overpass_api = "http://overpass.example/" + overground = False + bboxes = [[1, 2, 3, 4]] + expected_url = ( + "http://overpass.example/?data=" + "%5Bout%3Ajson%5D%5Btimeout%3A1000%5D%3B%28%28" + "rel%5Broute%3D%22light_rail%22%5D%281%2C2%2C3%2C4" + "%29%3Brel%5Broute%3D%22monorail%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22subway%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22train%22%5D%281%2C2%2C3%2C4%29%3B%29%3B" + "rel%28br%29%5Btype%3Droute_master%5D%3B" + "node%5Brailway%3Dsubway_entrance%5D%281%2C2%2C3%2C4%29%3B" + "node%5Brailway%3Dtrain_station_entrance%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Bpublic_transport%3Dstop_area%5D%281%2C2%2C3%2C4%29%3B" + "rel%28br%29%5Btype%3Dpublic_transport%5D%5Bpublic_transport%3D" + "stop_area_group%5D%3B%29%3B" + "%28._%3B%3E%3E%3B%29%3Bout%20body%20center%20qt%3B" + ) + + with mock.patch("process_subways.json.load") as load_mock: + load_mock.return_value = {"elements": []} + + with mock.patch( + "process_subways.urllib.request.urlopen" + ) as urlopen_mock: + urlopen_mock.return_value.getcode.return_value = 200 + + overpass_request(overground, overpass_api, bboxes) + + urlopen_mock.assert_called_once_with(expected_url, timeout=1000) diff --git a/tests/test_station.py b/tests/test_station.py new file mode 100644 index 00000000..2081aaa5 --- /dev/null +++ b/tests/test_station.py @@ -0,0 +1,46 @@ +from unittest import TestCase + +from subway_structure import Station + + +class TestStation(TestCase): + def test__get_modes(self) -> None: + cases = [ + {"element": {"tags": {"railway": "station"}}, "modes": set()}, + { + "element": { + "tags": {"railway": "station", "station": "train"} + }, + "modes": {"train"}, + }, + { + "element": {"tags": {"railway": "station", "train": "yes"}}, + "modes": {"train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "station": "subway", + "train": "yes", + } + }, + "modes": {"subway", "train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "subway": "yes", + "train": "yes", + "light_rail": "yes", + "monorail": "yes", + } + }, + "modes": {"subway", "train", "light_rail", "monorail"}, + }, + ] + for case in cases: + element = case["element"] + expected_modes = case["modes"] + self.assertSetEqual(expected_modes, Station.get_modes(element)) From 1e4e434d49f2c4023456ea43d0611621a5952600 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 19 Dec 2023 13:44:04 +0300 Subject: [PATCH 04/13] Add --dump-city-list option to untie cities.txt formation from mapsme.json --- README.md | 13 ++++++++++++- mapsme_json_to_cities.py | 12 ++++++++++-- process_subways.py | 21 +++++++++++++++++++-- scripts/process_subways.sh | 6 +++++- subway_structure.py | 6 +++--- tests/sample_data_for_error_messages.py | 2 +- tests/sample_data_for_twin_routes.py | 8 ++++---- 7 files changed, 54 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e259087a..b987e5f6 100644 --- a/README.md +++ b/README.md @@ -79,13 +79,24 @@ if you allow the `process_subway.py` to fetch data from Overpass API. Here are t python3 ./validation_to_html.py validation.log html ``` +## Publishing validation reports to the Web + +Expose a directory with static contents via a web-server and put into it: +- HTML files from the directory specified in the 2nd parameter of `validation_to_html.py` +- To vitalize "Y" (YAML), "J" (GeoJSON) and "M" (Map) links beside each city name: + - The contents of `render` directory from the repository + - `cities.txt` file generated with `--dump-city-list` parameter of `process_subways.py` + - YAML files created due to -d option of `process_subways.py` + - GeoJSON files created due to -j option of `process_subways.py` + + ## Related external resources Summary information about all metro networks that are monitored is gathered in the [Google Spreadsheet](https://docs.google.com/spreadsheets/d/1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k). Regular updates of validation results are available at -[this website](https://maps.mail.ru/osm/tools/subways/latest/). +[this website](https://maps.vk.com/osm/tools/subways/latest/). You can find more info about this validator instance in [OSM Wiki](https://wiki.openstreetmap.org/wiki/Quality_assurance#subway-preprocessor). diff --git a/mapsme_json_to_cities.py b/mapsme_json_to_cities.py index 1c69a77e..736b74b7 100644 --- a/mapsme_json_to_cities.py +++ b/mapsme_json_to_cities.py @@ -1,7 +1,15 @@ +""" +Generate sorted list of all cities, with [bad] mark for bad cities. + +!!! Deprecated for use in validation cycle. +Use "process_subways.py --dump-city-list " instead. +""" + + import argparse import json -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from process_subways import BAD_MARK, DEFAULT_CITIES_INFO_URL, get_cities_info if __name__ == "__main__": @@ -56,7 +64,7 @@ if ci["name"] in good_cities: lines.append(f"{ci['name']}, {ci['country']}") elif with_bad: - lines.append(f"{ci['name']}, {ci['country']} (Bad)") + lines.append(f"{ci['name']}, {ci['country']} {BAD_MARK}") for line in sorted(lines): print(line) diff --git a/process_subways.py b/process_subways.py index 1fd22621..ca71ed14 100755 --- a/process_subways.py +++ b/process_subways.py @@ -34,6 +34,7 @@ "https://docs.google.com/spreadsheets/d/" f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" ) +BAD_MARK = "[bad]" Point = tuple[float, float] @@ -69,7 +70,7 @@ def overpass_request( overground: bool, overpass_api: str, bboxes: list[list[float]] ) -> list[dict]: query = compose_overpass_request(overground, bboxes) - url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) + url = f"{overpass_api}?data={urllib.parse.quote(query)}" response = urllib.request.urlopen(url, timeout=1000) if (r_code := response.getcode()) != 200: raise Exception(f"Failed to query Overpass API: HTTP {r_code}") @@ -82,7 +83,7 @@ def multi_overpass( SLICE_SIZE = 10 INTERREQUEST_WAIT = 5 # in seconds result = [] - for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE): + for i in range(0, len(bboxes), SLICE_SIZE): if i > 0: time.sleep(INTERREQUEST_WAIT) bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 @@ -383,6 +384,14 @@ def main() -> None: type=argparse.FileType("w", encoding="utf-8"), help="Validation JSON file name", ) + parser.add_argument( + "--dump-city-list", + type=argparse.FileType("w", encoding="utf-8"), + help=( + "Dump sorted list of all city names, possibly with " + f"{BAD_MARK} mark" + ), + ) for processor_name, processor in inspect.getmembers( processors, inspect.ismodule @@ -496,6 +505,14 @@ def main() -> None: ", ".join(sorted(bad_city_names)), ) + if options.dump_city_list: + lines = sorted( + f"{city.name}, {city.country}" + f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" + for city in cities + ) + options.dump_city_list.writelines(lines) + if options.recovery_path: write_recovery_data(options.recovery_path, recovery_data, cities) diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 241b3c1b..345dd2de 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -53,6 +53,7 @@ Environment variable reference: - GIT_PULL: set to 1 to update the scripts - TMPDIR: path to temporary files - HTML_DIR: target path for generated HTML files + - DUMP_CITY_LIST: file name to save sorted list of cities, with [bad] mark for bad cities - SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/) - SERVER_KEY: rsa key to supply for uploading the files - REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading @@ -246,7 +247,10 @@ VALIDATION="$TMPDIR/validation.json" ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ ${GTFS:+--output-gtfs "$GTFS"} \ - ${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \ + ${CITY:+-c "$CITY"} \ + ${DUMP:+-d "$DUMP"} \ + ${GEOJSON:+-j "$GEOJSON"} \ + ${DUMP_CITY_LIST:+--dump-city-list "$DUMP_CITY_LIST"} \ ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ ${RECOVERY_PATH:+-r "$RECOVERY_PATH"} diff --git a/subway_structure.py b/subway_structure.py index e55bf715..7946e475 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1024,7 +1024,7 @@ def process_stop_members(self): continue if Station.is_station(el, self.city.modes): - # A station may be not included into this route due to previous + # A station may be not included in this route due to previous # 'stop area has multiple stations' error. No other error # message is needed. pass @@ -2085,7 +2085,7 @@ def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: ): self.notice( f"Stop {st.stoparea.station.name} {st.stop} is included " - f"into the {route2.id} but not included into {route1.id}", + f"in the {route2.id} but not included in {route1.id}", route1.element, ) @@ -2103,7 +2103,7 @@ def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: ): self.notice( f"Stop {st.stoparea.station.name} {st.stop} is included " - f"into the {route1.id} but not included into {route2.id}", + f"in the {route1.id} but not included in {route2.id}", route2.element, ) diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 245cfbbe..2e20c732 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -342,7 +342,7 @@ 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-3-5-1")', # noqa: E501 'Route does not have a return direction (relation 163, "04: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 164, "04: 2-1")', # noqa: E501 - 'Stop Station 2 (1.0, 0.0) is included into the r203 but not included into r204 (relation 204, "2: 3-1")', # noqa: E501 + 'Stop Station 2 (1.0, 0.0) is included in the r203 but not included in r204 (relation 204, "2: 3-1")', # noqa: E501 'Route does not have a return direction (relation 205, "3: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 206, "3: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 diff --git a/tests/sample_data_for_twin_routes.py b/tests/sample_data_for_twin_routes.py index 58b9e17f..5847632a 100644 --- a/tests/sample_data_for_twin_routes.py +++ b/tests/sample_data_for_twin_routes.py @@ -33,7 +33,7 @@ 'Route does not have a return direction (relation 157, "02: 4-1")', 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-2-3-4-5-1")', # noqa: E501 - 'Stop Station 4 (3.0, 0.0) is included into the r205 but not included into r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 + 'Stop Station 4 (3.0, 0.0) is included in the r205 but not included in r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 'Route does not have a return direction (relation 209, "5: 1-2-3-5-6-7")', # noqa: E501 @@ -66,12 +66,12 @@ "notices": [ 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r105 but not included into r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r105 but not included in r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 105, "3: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r107 but not included into r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r107 but not included in r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r205 but not included into r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r205 but not included in r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 205, "13: 1-...-9-10-11-...-20")', # noqa: E501 ], }, From e449c98a7f13bc7ad8969d2fb93ebda5c6641ce9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 26 Dec 2023 12:18:52 +0300 Subject: [PATCH 05/13] Mixin for comparison of json-like python structures --- tests/test_build_tracks.py | 4 +- tests/test_storage.py | 22 ++- tests/util.py | 265 +++++++++++++++++++++++++------------ 3 files changed, 202 insertions(+), 89 deletions(-) diff --git a/tests/test_build_tracks.py b/tests/test_build_tracks.py index a1b6a6c6..b694bbef 100644 --- a/tests/test_build_tracks.py +++ b/tests/test_build_tracks.py @@ -1,8 +1,8 @@ from tests.sample_data_for_build_tracks import metro_samples -from tests.util import TestCase +from tests.util import JsonLikeComparisonMixin, TestCase -class TestOneRouteTracks(TestCase): +class TestOneRouteTracks(JsonLikeComparisonMixin, TestCase): """Test tracks extending and truncating on one-route networks""" def prepare_city_routes(self, metro_sample: dict) -> tuple: diff --git a/tests/test_storage.py b/tests/test_storage.py index 978529f0..042f4284 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,11 +1,12 @@ import json +from operator import itemgetter from processors._common import transit_to_dict from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase, TestTransitDataMixin +from tests.util import JsonLikeComparisonMixin, TestCase -class TestStorage(TestCase, TestTransitDataMixin): +class TestStorage(JsonLikeComparisonMixin, TestCase): def test_storage(self) -> None: for sample in metro_samples: with self.subTest(msg=sample["name"]): @@ -21,6 +22,21 @@ def _test_storage_for_sample(self, metro_sample: dict) -> None: map(tuple, control_transit_data["transfers"]) ) - self.compare_transit_data( + self._compare_transit_data( calculated_transit_data, control_transit_data ) + + def _compare_transit_data( + self, transit_data1: dict, transit_data2: dict + ) -> None: + id_cmp = itemgetter("id") + + self.assertMappingAlmostEqual( + transit_data1, + transit_data2, + unordered_lists={ + "routes": id_cmp, + "itineraries": id_cmp, + "entrances": id_cmp, + }, + ) diff --git a/tests/util.py b/tests/util.py index 56b1962e..b8e29a95 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,8 +1,7 @@ import io -from collections.abc import Sequence, Mapping -from operator import itemgetter +from collections.abc import Callable, Mapping, Sequence from pathlib import Path -from typing import Any +from typing import Any, TypeAlias, Self from unittest import TestCase as unittestTestCase from process_subways import ( @@ -13,6 +12,8 @@ from subway_io import load_xml from subway_structure import City, find_transfers +TestCaseMixin: TypeAlias = Self | unittestTestCase + class TestCase(unittestTestCase): """TestCase class for testing the Subway Validator""" @@ -75,41 +76,82 @@ def assign_unique_id(city_info: dict, cities_info: list[dict]) -> None: transfers = find_transfers(elements, cities) return cities, transfers + +class JsonLikeComparisonMixin: + """Contains auxiliary methods for the TestCase class that allow + to compare json-like structures where some lists do not imply order + and actually represent sets. + Also, all collections compare floats with given precision to any nesting + depth. + """ + def _assertAnyAlmostEqual( - self, + self: TestCaseMixin, first: Any, second: Any, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Dispatcher method to other "...AlmostEqual" methods depending on argument types. + + Compare dictionaries/lists recursively, numeric values being compared + approximately. + + :param: first a value of arbitrary type, including collections + :param: second a value of arbitrary type, including collections + :param: places number of fractional digits. Is passed to + the self.assertAlmostEqual() method. + :param: unordered_lists a dict whose keys are names of lists + to be compared without order, values - comparators for + the lists to sort them in an unambiguous order. If a comparator + is None, then the lists are compared as sets. + :param: ignore_keys a set of strs with keys that should be ignored + during recursive comparison of dictionaries. May be used to + elaborate a custom comparison mechanism for some substructures. + :return: None """ - if isinstance(first, Mapping): - self.assertMappingAlmostEqual(first, second, places, ignore_keys) - elif isinstance(first, Sequence) and not isinstance( - first, (str, bytes) + if all(isinstance(x, Mapping) for x in (first, second)): + self.assertMappingAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif all( + isinstance(x, Sequence) and not isinstance(x, (str, bytes)) + for x in (first, second) ): - self.assertSequenceAlmostEqual(first, second, places, ignore_keys) - else: + self.assertSequenceAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif isinstance(first, float) and isinstance(second, float): self.assertAlmostEqual(first, second, places) + else: + self.assertEqual(first, second) def assertSequenceAlmostEqual( - self, + self: TestCaseMixin, seq1: Sequence, seq2: Sequence, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Compare two sequences, items of numeric types being compared approximately, containers being approx-compared recursively. - :param: seq1 a sequence of values of any types, including collections - :param: seq2 a sequence of values of any types, including collections - :param: places number of fractional digits (passed to - assertAlmostEqual() method of parent class) - :param: ignore_keys a set of strs with keys in dictionaries - that should be ignored during recursive comparison + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method :return: None """ if not (isinstance(seq1, Sequence) and isinstance(seq2, Sequence)): @@ -119,26 +161,99 @@ def assertSequenceAlmostEqual( ) self.assertEqual(len(seq1), len(seq2)) for a, b in zip(seq1, seq2): - self._assertAnyAlmostEqual(a, b, places, ignore_keys) + self._assertAnyAlmostEqual( + a, + b, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + + def assertSequenceAlmostEqualIgnoreOrder( + self: TestCaseMixin, + seq1: Sequence, + seq2: Sequence, + cmp: Callable | None, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Compares two sequences as sets, i.e. ignoring order. Nested + lists determined with unordered_lists parameter are also compared + without order. + + :param: cmp if None then compare sequences as sets. If elements are + not hashable then this method is inapplicable and the + sorted (with the comparator) sequences are compared. + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method + :return: None + """ + if cmp is not None: + v1 = sorted(seq1, key=cmp) + v2 = sorted(seq2, key=cmp) + self.assertSequenceAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + else: + self.assertEqual(len(seq1), len(seq2)) + v1 = set(seq1) + v2 = set(seq2) + self.assertSetEqual(v1, v2) def assertMappingAlmostEqual( - self, + self: TestCaseMixin, d1: Mapping, d2: Mapping, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Compare dictionaries recursively, numeric values being compared - approximately. + approximately, some lists being compared without order. + + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + Example 1: + d1 = { + "name_from_unordered_list": [a1, b1, c1], + "some_other_name": [e1, f1, g1], + } + d2 = { + "name_from_unordered_list": [a2, b2, c2], + "some_other_name": [e2, f2, g2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, lists [e1, f1, g1] and [e2, f2, g2] - + considering the order. - :param: d1 a mapping of arbitrary key/value types, - including collections - :param: d1 a mapping of arbitrary key/value types, - including collections - :param: places number of fractional digits (passed to - assertAlmostEqual() method of parent class) - :param: ignore_keys a set of strs with keys in dictionaries - that should be ignored during recursive comparison + Example 2: + d1 = { + "name_from_unordered_list": { + "key1": [a1, b1, c1], + "key2": [e1, f1, g1], + }, + "some_other_name": [h1, i1, k1], + } + d2 = { + "name_from_unordered_list": { + "key1": [a2, b2, c2], + "key2": [e2, f2, g2], + }, + "some_other_name": [h2, i2, k2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, as well as [e1, f1, g1] and + [e2, f2, g2]; lists [h1, i1, k1] and [h2, i2, k2] - + considering the order. + :param: ignore_keys see _assertAnyAlmostEqual() method :return: None """ if not (isinstance(d1, Mapping) and isinstance(d2, Mapping)): @@ -153,60 +268,42 @@ def assertMappingAlmostEqual( d1_keys -= ignore_keys d2_keys -= ignore_keys self.assertSetEqual(d1_keys, d2_keys) + + if unordered_lists is None: + unordered_lists = {} + for k in d1_keys: v1 = d1[k] v2 = d2[k] - self._assertAnyAlmostEqual(v1, v2, places, ignore_keys) - - -class TestTransitDataMixin: - def compare_transit_data(self, td1: dict, td2: dict) -> None: - """Compare transit data td1 and td2 remembering that: - - arrays that represent sets ("routes", "itineraries", "entrances") - should be compared without order; - - all floating-point values (coordinates) should be compared - approximately. - """ - self.assertMappingAlmostEqual( - td1, - td2, - ignore_keys={"stopareas", "routes", "itineraries"}, - ) - - networks1 = td1["networks"] - networks2 = td2["networks"] - - id_cmp = itemgetter("id") - - for network_name, network_data1 in networks1.items(): - network_data2 = networks2[network_name] - routes1 = sorted(network_data1["routes"], key=id_cmp) - routes2 = sorted(network_data2["routes"], key=id_cmp) - self.assertEqual(len(routes1), len(routes2)) - for r1, r2 in zip(routes1, routes2): - self.assertMappingAlmostEqual( - r1, r2, ignore_keys={"itineraries"} + if (cmp := unordered_lists.get(k, "")) == "" or not isinstance( + v1, (Sequence, Mapping) + ): + self._assertAnyAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif isinstance(v1, Sequence): + self.assertSequenceAlmostEqualIgnoreOrder( + v1, + v2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, ) - its1 = sorted(r1["itineraries"], key=id_cmp) - its2 = sorted(r2["itineraries"], key=id_cmp) - self.assertEqual(len(its1), len(its2)) - for it1, it2 in zip(its1, its2): - self.assertMappingAlmostEqual(it1, it2) - - transfers1 = td1["transfers"] - transfers2 = td2["transfers"] - self.assertSetEqual(transfers1, transfers2) - - stopareas1 = td1["stopareas"] - stopareas2 = td2["stopareas"] - self.assertMappingAlmostEqual( - stopareas1, stopareas2, ignore_keys={"entrances"} - ) - - for sa_id, sa1_data in stopareas1.items(): - sa2_data = stopareas2[sa_id] - entrances1 = sorted(sa1_data["entrances"], key=id_cmp) - entrances2 = sorted(sa2_data["entrances"], key=id_cmp) - self.assertEqual(len(entrances1), len(entrances2)) - for e1, e2 in zip(entrances1, entrances2): - self.assertMappingAlmostEqual(e1, e2) + else: + self.assertSetEqual(set(v1.keys()), set(v2.keys())) + for ik in v1.keys(): + iv1 = v1[ik] + iv2 = v2[ik] + self.assertSequenceAlmostEqualIgnoreOrder( + iv1, + iv2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) From f7087a0c25c44e74f3c57a2f54cfa2cd94afe5c0 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 2 Feb 2024 10:14:47 +0300 Subject: [PATCH 06/13] Use stoparea ids instead of instances in transfers; save transfers only for good cities --- process_subways.py | 2 +- processors/_common.py | 15 +- processors/mapsme.py | 48 +++-- subway_structure.py | 122 +++++++----- tests/sample_data_for_outputs.py | 316 +++++++++++++++++++++++++++++++ tests/test_find_transfers.py | 30 +++ tests/test_mapsme_processor.py | 53 ++++++ tests/util.py | 2 +- 8 files changed, 520 insertions(+), 68 deletions(-) create mode 100644 tests/test_find_transfers.py create mode 100644 tests/test_mapsme_processor.py diff --git a/process_subways.py b/process_subways.py index ca71ed14..6f184532 100755 --- a/process_subways.py +++ b/process_subways.py @@ -490,7 +490,7 @@ def main() -> None: good_cities = validate_cities(cities) logging.info("Finding transfer stations") - transfers = find_transfers(osm, cities) + transfers = find_transfers(osm, good_cities) good_city_names = set(c.name for c in good_cities) logging.info( diff --git a/processors/_common.py b/processors/_common.py index e9337190..edb19f43 100644 --- a/processors/_common.py +++ b/processors/_common.py @@ -91,18 +91,17 @@ def transit_to_dict( # transfers pairwise_transfers = set() - for stoparea_set in transfers: - stoparea_list = list(stoparea_set) - for first_i in range(len(stoparea_list) - 1): - for second_i in range(first_i + 1, len(stoparea_list)): - stoparea1_id = stoparea_list[first_i].id - stoparea2_id = stoparea_list[second_i].id + for stoparea_id_set in transfers: + stoparea_ids = sorted(stoparea_id_set) + for first_i in range(len(stoparea_ids) - 1): + for second_i in range(first_i + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[first_i] + stoparea2_id = stoparea_ids[second_i] if all( st_id in data["stopareas"] for st_id in (stoparea1_id, stoparea2_id) ): - id1, id2 = sorted([stoparea1_id, stoparea2_id]) - pairwise_transfers.add((id1, id2)) + pairwise_transfers.add((stoparea1_id, stoparea2_id)) data["transfers"] = pairwise_transfers return data diff --git a/processors/mapsme.py b/processors/mapsme.py index b8818ea5..2f3ec6f2 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -4,10 +4,12 @@ from collections import defaultdict from subway_structure import ( + City, DISPLACEMENT_TOLERANCE, distance, el_center, Station, + TransfersT, ) from ._common import ( DEFAULT_INTERVAL, @@ -180,11 +182,12 @@ def save(self): logging.warning("Failed to save cache: %s", str(e)) -def process(cities, transfers, filename, cache_path): +def transit_data_to_mapsme( + cities: list[City], transfers: TransfersT, cache_path: str | None +) -> dict: """Generate all output and save to file. :param cities: List of City instances :param transfers: List of sets of StopArea.id - :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ @@ -362,18 +365,21 @@ def find_exits_for_platform(center, nodes): pairwise_transfers = ( {} ) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 - for t_set in transfers: - t = list(t_set) - for t_first in range(len(t) - 1): - for t_second in range(t_first + 1, len(t)): - stoparea1 = t[t_first] - stoparea2 = t[t_second] - if stoparea1.id in stops and stoparea2.id in stops: - uid1 = uid(stoparea1.id) - uid2 = uid(stoparea2.id) + for stoparea_id_set in transfers: + stoparea_ids = list(stoparea_id_set) + for i_first in range(len(stoparea_ids) - 1): + for i_second in range(i_first + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[i_first] + stoparea2_id = stoparea_ids[i_second] + if stoparea1_id in stops and stoparea2_id in stops: + uid1 = uid(stoparea1_id) + uid2 = uid(stoparea2_id) uid1, uid2 = sorted([uid1, uid2]) transfer_time = TRANSFER_PENALTY + round( - distance(stoparea1.center, stoparea2.center) + distance( + stop_areas[stoparea1_id].center, + stop_areas[stoparea2_id].center, + ) / SPEED_ON_TRANSFER ) pairwise_transfers[(uid1, uid2)] = transfer_time @@ -392,13 +398,29 @@ def find_exits_for_platform(center, nodes): "transfers": pairwise_transfers, "networks": networks, } + return result + +def process( + cities: list[City], + transfers: TransfersT, + filename: str, + cache_path: str | None, +): + """Generate all output and save to file. + :param cities: List of City instances + :param transfers: List of sets of StopArea.id + :param filename: Path to file to save the result + :param cache_path: Path to json-file with good cities cache or None. + """ if not filename.lower().endswith("json"): filename = f"{filename}.json" + mapsme_transit = transit_data_to_mapsme(cities, transfers, cache_path) + with open(filename, "w", encoding="utf-8") as f: json.dump( - result, + mapsme_transit, f, indent=1, ensure_ascii=False, diff --git a/subway_structure.py b/subway_structure.py index 7946e475..c7e73275 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -3,6 +3,7 @@ import math import re from collections import Counter, defaultdict +from collections.abc import Collection, Iterator from itertools import chain, islice from css_colours import normalize_colour @@ -45,6 +46,10 @@ START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") +IdT = str # Type of feature ids +TransferT = set[IdT] # A transfer is a set of StopArea IDs +TransfersT = Collection[TransferT] + def get_start_end_times(opening_hours): """Very simplified method to parse OSM opening_hours tag. @@ -664,6 +669,14 @@ def get_interval(tags): return None return osm_interval_to_seconds(v) + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_stop in self: + stoparea = route_stop.stoparea + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + def __init__(self, relation, city, master=None): assert Route.is_route( relation, city.modes @@ -1465,6 +1478,14 @@ def __init__(self, master=None): self.name = None self.interval = None + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route in self: + for stoparea in route.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + def add(self, route, city): if not self.network: self.network = route.network @@ -1682,7 +1703,7 @@ def __init__(self, city_data, overground=False): self.stop_areas = defaultdict( list ) # El_id → list of stop_area elements it belongs to - self.transfers = [] # List of lists of stop areas + self.transfers: TransfersT = [] # List of sets of stop areas self.station_ids = set() # Set of stations' uid self.stops_and_platforms = set() # Set of stops and platforms el_id self.recovery_data = None @@ -1787,18 +1808,19 @@ def add(self, el): else: stop_areas.append(el) - def make_transfer(self, sag): + def make_transfer(self, stoparea_group: dict) -> None: transfer = set() - for m in sag["members"]: + for m in stoparea_group["members"]: k = el_id(m) el = self.elements.get(k) if not el: - # A sag member may validly not belong to the city while - # the sag does - near the city bbox boundary + # A stoparea_group member may validly not belong to the city + # while the stoparea_group does - near the city bbox boundary continue if "tags" not in el: self.warn( - "An untagged object {} in a stop_area_group".format(k), sag + "An untagged object {} in a stop_area_group".format(k), + stoparea_group, ) continue if ( @@ -1825,7 +1847,7 @@ def make_transfer(self, sag): k ) ) - stoparea.transfer = el_id(sag) + stoparea.transfer = el_id(stoparea_group) if len(transfer) > 1: self.transfers.append(transfer) @@ -1918,20 +1940,28 @@ def extract_routes(self) -> None: self.make_transfer(el) # Filter transfers, leaving only stations that belong to routes - used_stop_areas = set() - for rmaster in self.routes.values(): - for route in rmaster: - used_stop_areas.update([s.stoparea for s in route.stops]) - new_transfers = [] - for transfer in self.transfers: - new_tr = [s for s in transfer if s in used_stop_areas] - if len(new_tr) > 1: - new_transfers.append(new_tr) - self.transfers = new_transfers + own_stopareas = set(self.stopareas()) + + self.transfers = [ + inner_transfer + for inner_transfer in ( + own_stopareas.intersection(transfer) + for transfer in self.transfers + ) + if len(inner_transfer) > 1 + ] def __iter__(self): return iter(self.routes.values()) + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_master in self: + for stoparea in route_master.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + @property def is_good(self): if not (self.errors or self.validate_called): @@ -2306,36 +2336,38 @@ def calculate_distances(self) -> None: route.calculate_distances() -def find_transfers(elements, cities): +def find_transfers( + elements: list[dict], cities: Collection[City] +) -> TransfersT: + """As for now, two Cities may contain the same stoparea, but those + StopArea instances would have different python id. So we don't store + references to StopAreas, but only their ids. This is important at + inter-city interchanges. + """ + stop_area_groups = [ + el + for el in elements + if el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" + ] + + stopareas_in_cities_ids = set( + stoparea.id + for city in cities + if city.is_good + for stoparea in city.stopareas() + ) + transfers = [] - stop_area_groups = [] - for el in elements: - if ( - el["type"] == "relation" - and "members" in el - and el.get("tags", {}).get("public_transport") == "stop_area_group" - ): - stop_area_groups.append(el) - - # StopArea.id uniquely identifies a StopArea. We must ensure StopArea - # uniqueness since one stop_area relation may result in - # several StopArea instances at inter-city interchanges. - stop_area_ids = defaultdict(set) # el_id -> set of StopArea.id - stop_area_objects = dict() # StopArea.id -> one of StopArea instances - for city in cities: - for el, st in city.stations.items(): - stop_area_ids[el].update(sa.id for sa in st) - stop_area_objects.update((sa.id, sa) for sa in st) - - for sag in stop_area_groups: - transfer = set() - for m in sag["members"]: - k = el_id(m) - if k not in stop_area_ids: - continue - transfer.update( - stop_area_objects[sa_id] for sa_id in stop_area_ids[k] + for stop_area_group in stop_area_groups: + transfer: TransferT = set( + member_id + for member_id in ( + el_id(member) for member in stop_area_group["members"] ) + if member_id in stopareas_in_cities_ids + ) if len(transfer) > 1: transfers.append(transfer) return transfers diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py index 29012d5c..b50ddbe2 100644 --- a/tests/sample_data_for_outputs.py +++ b/tests/sample_data_for_outputs.py @@ -21,6 +21,7 @@ }, ], "gtfs_dir": "assets/tiny_world_gtfs", + "transfers": [{"r1", "r2"}, {"r3", "r4"}], "json_dump": """ { "stopareas": { @@ -366,5 +367,320 @@ ] } """, + "mapsme_output": { + "stops": [ + { + "name": "Station 1", + "int_name": None, + "lat": 0.0, + "lon": 0.0, + "osm_type": "node", + "osm_id": 1, + "id": 8, + "entrances": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 2", + "int_name": None, + "lat": 0.0047037307, + "lon": 0.00470373068, + "osm_type": "node", + "osm_id": 2, + "id": 14, + "entrances": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + }, + { + "name": "Station 3", + "int_name": None, + "lat": 0.0097589171, + "lon": 0.01012040581, + "osm_type": "node", + "osm_id": 3, + "id": 30, + "entrances": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + }, + { + "name": "Station 4", + "int_name": None, + "lat": 0.01, + "lon": 0.0, + "osm_type": "node", + "osm_id": 4, + "id": 32, + "entrances": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + }, + { + "name": "Station 5", + "int_name": None, + "lat": 0.00514739839, + "lon": 0.0047718624, + "osm_type": "node", + "osm_id": 5, + "id": 22, + "entrances": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + }, + { + "name": "Station 6", + "int_name": None, + "lat": 0.0, + "lon": 0.01, + "osm_type": "node", + "osm_id": 6, + "id": 48, + "entrances": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 7", + "int_name": None, + "lat": 0.010286367745, + "lon": 0.009716854315, + "osm_type": "node", + "osm_id": 7, + "id": 38, + "entrances": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + }, + { + "name": "Station 8", + "int_name": None, + "lat": 0.014377764559999999, + "lon": 0.012405493905, + "osm_type": "node", + "osm_id": 8, + "id": 134, + "entrances": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + }, + ], + "transfers": [(14, 22, 81), (30, 38, 106)], + "networks": [ + { + "network": "Intersecting 2 metro lines", + "routes": [ + { + "type": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "0000ff", + "route_id": 30, + "itineraries": [ + { + "stops": [[8, 0], [14, 67], [30, 141]], + "interval": 150, + }, + { + "stops": [[30, 0], [14, 74], [8, 141]], + "interval": 150, + }, + ], + }, + { + "type": "subway", + "ref": "2", + "name": "Red Line", + "colour": "ff0000", + "route_id": 28, + "itineraries": [ + { + "stops": [[32, 0], [22, 68], [48, 142]], + "interval": 150, + }, + { + "stops": [[48, 0], [22, 74], [32, 142]], + "interval": 150, + }, + ], + }, + ], + "agency_id": 1, + }, + { + "network": "One light rail line", + "routes": [ + { + "type": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "ffffff", + "route_id": 22, + "itineraries": [ + { + "stops": [[38, 0], [134, 49]], + "interval": 150, + }, + { + "stops": [[134, 0], [38, 48]], + "interval": 150, + }, + ], + "casing": "a52a2a", + } + ], + "agency_id": 2, + }, + ], + }, }, ] diff --git a/tests/test_find_transfers.py b/tests/test_find_transfers.py new file mode 100644 index 00000000..bb46dc36 --- /dev/null +++ b/tests/test_find_transfers.py @@ -0,0 +1,30 @@ +from copy import deepcopy + +from tests.sample_data_for_outputs import metro_samples +from tests.util import TestCase, JsonLikeComparisonMixin + + +class TestTransfers(JsonLikeComparisonMixin, TestCase): + """Test that the validator provides expected set of transfers.""" + + def _test__find_transfers__for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + expected_transfers = metro_sample["transfers"] + + self.assertSequenceAlmostEqualIgnoreOrder( + expected_transfers, + transfers, + cmp=lambda transfer_as_set: sorted(transfer_as_set), + ) + + def test__find_transfers(self) -> None: + sample1 = metro_samples[0] + + sample2 = deepcopy(metro_samples[0]) + # Make the second city invalid and thus exclude the inter-city transfer + sample2["cities_info"][1]["num_stations"] += 1 + sample2["transfers"] = [{"r1", "r2"}] + + for sample in sample1, sample2: + with self.subTest(msg=sample["name"]): + self._test__find_transfers__for_sample(sample) diff --git a/tests/test_mapsme_processor.py b/tests/test_mapsme_processor.py new file mode 100644 index 00000000..64eb9cbd --- /dev/null +++ b/tests/test_mapsme_processor.py @@ -0,0 +1,53 @@ +from operator import itemgetter + +from processors.mapsme import transit_data_to_mapsme +from tests.sample_data_for_outputs import metro_samples +from tests.util import JsonLikeComparisonMixin, TestCase + + +class TestMapsme(JsonLikeComparisonMixin, TestCase): + """Test processors/mapsme.py""" + + def test__transit_data_to_mapsme(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test__transit_data_to_mapsme__for_sample(sample) + + def _test__transit_data_to_mapsme__for_sample( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + calculated_mapsme_data = transit_data_to_mapsme( + cities, transfers, cache_path=None + ) + control_mapsme_data = metro_sample["mapsme_output"] + + self.assertSetEqual( + set(control_mapsme_data.keys()), + set(calculated_mapsme_data.keys()), + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["stops"], + calculated_mapsme_data["stops"], + cmp=itemgetter("id"), + unordered_lists={ + "entrances": lambda e: (e["osm_type"], e["osm_id"]), + "exits": lambda e: (e["osm_type"], e["osm_id"]), + }, + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["transfers"], + calculated_mapsme_data["transfers"], + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["networks"], + calculated_mapsme_data["networks"], + cmp=itemgetter("network"), + unordered_lists={ + "routes": itemgetter("route_id"), + "itineraries": lambda it: (it["stops"], it["interval"]), + }, + ) diff --git a/tests/util.py b/tests/util.py index b8e29a95..bfc3fd89 100644 --- a/tests/util.py +++ b/tests/util.py @@ -173,7 +173,7 @@ def assertSequenceAlmostEqualIgnoreOrder( self: TestCaseMixin, seq1: Sequence, seq2: Sequence, - cmp: Callable | None, + cmp: Callable | None = None, places: int = 10, *, unordered_lists: dict[str, Callable] | None = None, From 28f4c0d139891bb5a4e4a8ae31d6301206222808 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 21 Feb 2024 23:33:07 +0300 Subject: [PATCH 07/13] Special searching for backward counterparts for circular routes --- .gitignore | 1 + subway_io.py | 2 +- subway_structure.py | 456 ++++++++++++++---------- tests/assets/route_masters.osm | 45 ++- tests/assets/twin_routes.osm | 6 +- tests/sample_data_for_error_messages.py | 6 +- tests/test_route_master.py | 92 ++++- 7 files changed, 407 insertions(+), 201 deletions(-) diff --git a/.gitignore b/.gitignore index f2fb32fb..129911ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ tmp_html/ html/ .idea .DS_Store +.venv *.log *.json *.geojson diff --git a/subway_io.py b/subway_io.py index cbd252a0..4b025965 100644 --- a/subway_io.py +++ b/subway_io.py @@ -88,7 +88,7 @@ def write_yaml(data, f, indent=""): routes = [] for route in city: stations = OrderedDict( - [(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()] + [(sa.transfer or sa.id, sa.name) for sa in route.stopareas()] ) rte = { "type": route.mode, diff --git a/subway_structure.py b/subway_structure.py index c7e73275..d486d90e 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -5,6 +5,7 @@ from collections import Counter, defaultdict from collections.abc import Collection, Iterator from itertools import chain, islice +from typing import TypeVar from css_colours import normalize_colour @@ -49,6 +50,7 @@ IdT = str # Type of feature ids TransferT = set[IdT] # A transfer is a set of StopArea IDs TransfersT = Collection[TransferT] +T = TypeVar("T") def get_start_end_times(opening_hours): @@ -626,7 +628,7 @@ def __repr__(self): class Route: - """The longest route for a city with a unique ref.""" + """Corresponds to OSM "type=route" relation""" @staticmethod def is_route(el, modes): @@ -677,7 +679,12 @@ def stopareas(self) -> Iterator[StopArea]: yield stoparea yielded_stopareas.add(stoparea) - def __init__(self, relation, city, master=None): + def __init__( + self, + relation: dict, + city: City, + master: dict | None = None, + ) -> None: assert Route.is_route( relation, city.modes ), f"The relation does not seem to be a route: {relation}" @@ -1440,7 +1447,8 @@ def __repr__(self): class RouteMaster: - def __init__(self, master=None): + def __init__(self, city: City, master: dict = None) -> None: + self.city = city self.routes = [] self.best = None self.id = el_id(master) @@ -1486,11 +1494,11 @@ def stopareas(self) -> Iterator[StopArea]: yield stoparea yielded_stopareas.add(stoparea) - def add(self, route, city): + def add(self, route: Route) -> None: if not self.network: self.network = route.network elif route.network and route.network != self.network: - city.error( + self.city.error( 'Route has different network ("{}") from master "{}"'.format( route.network, self.network ), @@ -1500,7 +1508,7 @@ def add(self, route, city): if not self.colour: self.colour = route.colour elif route.colour and route.colour != self.colour: - city.notice( + self.city.notice( 'Route "{}" has different colour from master "{}"'.format( route.colour, self.colour ), @@ -1510,7 +1518,7 @@ def add(self, route, city): if not self.infill: self.infill = route.infill elif route.infill and route.infill != self.infill: - city.notice( + self.city.notice( ( f'Route "{route.infill}" has different infill colour ' f'from master "{self.infill}"' @@ -1521,7 +1529,7 @@ def add(self, route, city): if not self.ref: self.ref = route.ref elif route.ref != self.ref: - city.notice( + self.city.notice( 'Route "{}" has different ref from master "{}"'.format( route.ref, self.ref ), @@ -1534,7 +1542,7 @@ def add(self, route, city): if not self.mode: self.mode = route.mode elif route.mode != self.mode: - city.error( + self.city.error( "Incompatible PT mode: master has {} and route has {}".format( self.mode, route.mode ), @@ -1568,8 +1576,8 @@ def get_meaningful_routes(self) -> list[Route]: return [route for route in self if len(route) >= 2] def find_twin_routes(self) -> dict[Route, Route]: - """Two routes are twins if they have the same end stations - and opposite directions, and the number of stations is + """Two non-circular routes are twins if they have the same end + stations and opposite directions, and the number of stations is the same or almost the same. We'll then find stops that are present in one direction and is missing in another direction - to warn. """ @@ -1581,8 +1589,6 @@ def find_twin_routes(self) -> dict[Route, Route]: continue # Difficult to calculate. TODO(?) in the future if route in twin_routes: continue - if len(route) < 2: - continue route_transfer_ids = set(route.get_transfers_sequence()) ends = route.get_end_transfers() @@ -1617,15 +1623,253 @@ def find_twin_routes(self) -> dict[Route, Route]: return twin_routes - def stop_areas(self): - """Returns a list of all stations on all route variants.""" - seen_ids = set() - for route in self.routes: - for stop in route: - st = stop.stoparea - if st.id not in seen_ids: - seen_ids.add(st.id) - yield st + def check_return_routes(self) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = self.get_meaningful_routes() + + if len(meaningful_routes) == 0: + self.city.error( + f"An empty route master {self.id}. " + "Please set construction:route if it is under construction" + ) + elif len(meaningful_routes) == 1: + log_function = ( + self.city.error + if not self.best.is_circular + else self.city.notice + ) + log_function( + "Only one route in route_master. " + "Please check if it needs a return route", + self.best.element, + ) + else: + self.check_return_circular_routes() + self.check_return_noncircular_routes() + + def check_return_noncircular_routes(self) -> None: + routes = [ + route + for route in self.get_meaningful_routes() + if not route.is_circular + ] + all_ends = {route.get_end_transfers(): route for route in routes} + for route in routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.city.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = self.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def check_return_circular_routes(self) -> None: + routes = { + route + for route in self.get_meaningful_routes() + if route.is_circular + } + routes_having_backward = set() + + for route in routes: + if route in routes_having_backward: + continue + transfer_sequence1 = [ + stop.stoparea.transfer or stop.stoparea.id for stop in route + ] + transfer_sequence1.pop() + for potential_backward_route in routes - {route}: + transfer_sequence2 = [ + stop.stoparea.transfer or stop.stoparea.id + for stop in potential_backward_route + ][ + -2::-1 + ] # truncate repeated first stop and reverse + common_subsequence = self.find_common_circular_subsequence( + transfer_sequence1, transfer_sequence2 + ) + if len(common_subsequence) >= 0.8 * min( + len(transfer_sequence1), len(transfer_sequence2) + ): + routes_having_backward.add(route) + routes_having_backward.add(potential_backward_route) + break + + for route in routes - routes_having_backward: + self.city.notice( + "Route does not have a return direction", route.element + ) + + @staticmethod + def find_common_circular_subsequence( + seq1: list[T], seq2: list[T] + ) -> list[T]: + """seq1 and seq2 are supposed to be stops of some circular routes. + Prerequisites to rely on the result: + - elements of each sequence are not repeated + - the order of stations is not violated. + Under these conditions we don't need LCS algorithm. Linear scan is + sufficient. + """ + i1, i2 = -1, -1 + for i1, x in enumerate(seq1): + try: + i2 = seq2.index(x) + except ValueError: + continue + else: + # x is found both in seq1 and seq2 + break + + if i2 == -1: + return [] + + # Shift cyclically so that the common element takes the first position + # both in seq1 and seq2 + seq1 = seq1[i1:] + seq1[:i1] + seq2 = seq2[i2:] + seq2[:i2] + + common_subsequence = [] + i2 = 0 + for x in seq1: + try: + i2 = seq2.index(x, i2) + except ValueError: + continue + common_subsequence.append(x) + i2 += 1 + if i2 >= len(seq2): + break + return common_subsequence + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route2.id} but not included in {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route1.id} but not included in {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.city.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) def __len__(self): return len(self.routes) @@ -1923,8 +2167,8 @@ def extract_routes(self) -> None: k = el_id(master) if master else route.ref if k not in self.routes: - self.routes[k] = RouteMaster(master) - self.routes[k].add(route, self) + self.routes[k] = RouteMaster(self, master) + self.routes[k].add(route) # Sometimes adding a route to a newly initialized RouteMaster # can fail @@ -2055,166 +2299,6 @@ def count_unused_entrances(self): f"relations: {format_elid_list(not_in_sa)}" ) - def check_return_routes(self, rmaster: RouteMaster) -> None: - """Check if a route has return direction, and if twin routes - miss stations. - """ - meaningful_routes = rmaster.get_meaningful_routes() - - if len(meaningful_routes) == 0: - self.error( - f"An empty route master {rmaster.id}. " - "Please set construction:route if it is under construction" - ) - elif len(meaningful_routes) == 1: - log_function = ( - self.error if not rmaster.best.is_circular else self.notice - ) - log_function( - "Only one route in route_master. " - "Please check if it needs a return route", - rmaster.best.element, - ) - else: - all_ends = { - route.get_end_transfers(): route for route in meaningful_routes - } - for route in meaningful_routes: - ends = route.get_end_transfers() - if ends[::-1] not in all_ends: - self.notice( - "Route does not have a return direction", route.element - ) - - twin_routes = rmaster.find_twin_routes() - for route1, route2 in twin_routes.items(): - if route1.id > route2.id: - continue # to process a pair of routes only once - # and to ensure the order of routes in the pair - self.alert_twin_routes_differ(route1, route2) - - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: - """Arguments are that route1.id < route2.id""" - ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) = self.calculate_twin_routes_diff(route1, route2) - - for st in stops_missing_from_route1: - if ( - not route1.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route1.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route2.id} but not included in {route1.id}", - route1.element, - ) - - for st in stops_missing_from_route2: - if ( - not route2.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route2.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route1.id} but not included in {route2.id}", - route2.element, - ) - - for st1, st2 in stops_that_dont_match: - if ( - st1.stoparea.station == st2.stoparea.station - or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE - ): - self.notice( - "Should there be one stoparea or a transfer between " - f"{st1.stoparea.station.name} {st1.stop} and " - f"{st2.stoparea.station.name} {st2.stop}?", - route1.element, - ) - - @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: - """Wagner–Fischer algorithm for stops diff in two twin routes.""" - - stops1 = route1.stops - stops2 = route2.stops[::-1] - - def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: - return ( - stop1.stoparea == stop2.stoparea - or stop1.stoparea.transfer is not None - and stop1.stoparea.transfer == stop2.stoparea.transfer - ) - - d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] - d[0] = list(range(len(stops2) + 1)) - for i in range(len(stops1) + 1): - d[i][0] = i - - for i in range(1, len(stops1) + 1): - for j in range(1, len(stops2) + 1): - d[i][j] = ( - d[i - 1][j - 1] - if stops_match(stops1[i - 1], stops2[j - 1]) - else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 - ) - - stops_missing_from_route1: list[RouteStop] = [] - stops_missing_from_route2: list[RouteStop] = [] - stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] - - i = len(stops1) - j = len(stops2) - while not (i == 0 and j == 0): - action = None - if i > 0 and j > 0: - match = stops_match(stops1[i - 1], stops2[j - 1]) - if match and d[i - 1][j - 1] == d[i][j]: - action = "no" - elif not match and d[i - 1][j - 1] + 1 == d[i][j]: - action = "change" - if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: - action = "add_2" - if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: - action = "add_1" - - match action: - case "add_1": - stops_missing_from_route1.append(stops2[j - 1]) - j -= 1 - case "add_2": - stops_missing_from_route2.append(stops1[i - 1]) - i -= 1 - case _: - if action == "change": - stops_that_dont_match.append( - (stops1[i - 1], stops2[j - 1]) - ) - i -= 1 - j -= 1 - return ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) - def validate_lines(self): self.found_light_lines = len( [x for x in self.routes.values() if x.mode != "subway"] @@ -2267,9 +2351,9 @@ def validate(self): for rmaster in self.routes.values(): networks[str(rmaster.network)] += 1 if not self.overground: - self.check_return_routes(rmaster) + rmaster.check_return_routes() route_stations = set() - for sa in rmaster.stop_areas(): + for sa in rmaster.stopareas(): route_stations.add(sa.transfer or sa.id) unused_stations.discard(sa.station.id) self.found_stations += len(route_stations) diff --git a/tests/assets/route_masters.osm b/tests/assets/route_masters.osm index 0635a2bb..1d466c8f 100644 --- a/tests/assets/route_masters.osm +++ b/tests/assets/route_masters.osm @@ -194,7 +194,7 @@ - + @@ -202,8 +202,8 @@ - + @@ -272,12 +272,27 @@ - - - + + + + + - - + + + + + + + + + + + + + + + @@ -524,4 +539,20 @@ + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes.osm b/tests/assets/twin_routes.osm index e2e7f428..38cbe6c6 100644 --- a/tests/assets/twin_routes.osm +++ b/tests/assets/twin_routes.osm @@ -288,10 +288,10 @@ - - - + + + diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 2e20c732..0f5a434a 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -326,9 +326,9 @@ "xml_file": "assets/route_masters.osm", "cities_info": [ { - "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4) + "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4 + 3) + (3 + 3 + 3 + 3 + 3 + 3 + 4), - "num_lines": 7 + 7, + "num_lines": 8 + 7, "num_interchanges": 0 + 1, }, ], @@ -350,6 +350,8 @@ 'Route does not have a return direction (relation 209, "5: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 210, "5: 2-1")', # noqa: E501 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-8-1")', # noqa: E501 + 'Route does not have a return direction (relation 168, "C5: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 169, "C5: 3-5-1-3")', # noqa: E501 ], }, ] diff --git a/tests/test_route_master.py b/tests/test_route_master.py index 1bab6173..22d2f8bd 100644 --- a/tests/test_route_master.py +++ b/tests/test_route_master.py @@ -1,9 +1,97 @@ -from tests.util import TestCase - +from subway_structure import RouteMaster from tests.sample_data_for_twin_routes import metro_samples +from tests.util import TestCase class TestRouteMaster(TestCase): + def test__find_common_circular_subsequence(self) -> None: + cases = [ + { # the 1st sequence is empty + "sequence1": [], + "sequence2": [1, 2, 3, 4], + "answer": [], + }, + { # the 2nd sequence is empty + "sequence1": [1, 2, 3, 4], + "sequence2": [], + "answer": [], + }, + { # equal sequences + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 3, 4], + }, + { # one sequence is a cyclic shift of the other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 1, 2, 3], + "answer": [1, 2, 3, 4], + }, + { # the 2nd sequence is a subsequence of the 1st; equal ends + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 4], + "answer": [1, 2, 4], + }, + { # the 1st sequence is a subsequence of the 2nd; equal ends + "sequence1": [1, 2, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 4], + }, + { # the 2nd sequence is an innter subsequence of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 3], + "answer": [2, 3], + }, + { # the 1st sequence is an inner subsequence of the 2nd + "sequence1": [2, 3], + "sequence2": [1, 2, 3, 4], + "answer": [2, 3], + }, + { # the 2nd sequence is a continuation of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 5, 6], + "answer": [4], + }, + { # the 1st sequence is a continuation of the 2nd + "sequence1": [4, 5, 6], + "sequence2": [1, 2, 3, 4], + "answer": [4], + }, + { # no common elements + "sequence1": [1, 2, 3, 4], + "sequence2": [5, 6, 7], + "answer": [], + }, + { # one sequence is the reversed other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 3, 2, 1], + "answer": [1, 2], + }, + { # the 2nd is a subsequence of shifted 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 4, 1], + "answer": [1, 2, 4], + }, + { # the 1st is a subsequence of shifted 2nd + "sequence1": [2, 4, 1], + "sequence2": [1, 2, 3, 4], + "answer": [2, 4, 1], + }, + { # mixed case: few common elements + "sequence1": [1, 2, 4], + "sequence2": [2, 3, 4], + "answer": [2, 4], + }, + ] + + for i, case in enumerate(cases): + with self.subTest(f"case#{i}"): + self.assertListEqual( + case["answer"], + RouteMaster.find_common_circular_subsequence( + case["sequence1"], case["sequence2"] + ), + ) + def _test_find_twin_routes_for_network(self, metro_sample: dict) -> None: cities, transfers = self.prepare_cities(metro_sample) city = cities[0] From c2f2956da1131ac51bf253b0eecb747c3560bba9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 27 Feb 2024 14:59:51 +0300 Subject: [PATCH 08/13] Add type aliases, declarations and annotations --- css_colours.py | 2 +- process_subways.py | 58 +++++----- processors/_common.py | 10 +- processors/gtfs.py | 23 ++-- processors/mapsme.py | 71 +++++++----- subway_io.py | 37 +++--- subway_structure.py | 256 +++++++++++++++++++++++------------------- validation_to_html.py | 7 +- 8 files changed, 249 insertions(+), 215 deletions(-) diff --git a/css_colours.py b/css_colours.py index 72180547..170d3900 100644 --- a/css_colours.py +++ b/css_colours.py @@ -152,7 +152,7 @@ } -def normalize_colour(c): +def normalize_colour(c: str | None) -> str | None: if not c: return None c = c.strip().lower() diff --git a/process_subways.py b/process_subways.py index 6f184532..3726f3ad 100755 --- a/process_subways.py +++ b/process_subways.py @@ -25,8 +25,10 @@ CriticalValidationError, find_transfers, get_unused_subway_entrances_geojson, + LonLat, MODES_OVERGROUND, MODES_RAPID, + OsmElementT, ) DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" @@ -36,8 +38,6 @@ ) BAD_MARK = "[bad]" -Point = tuple[float, float] - def compose_overpass_request( overground: bool, bboxes: list[list[float]] @@ -68,7 +68,7 @@ def compose_overpass_request( def overpass_request( overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +) -> list[OsmElementT]: query = compose_overpass_request(overground, bboxes) url = f"{overpass_api}?data={urllib.parse.quote(query)}" response = urllib.request.urlopen(url, timeout=1000) @@ -79,7 +79,7 @@ def overpass_request( def multi_overpass( overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +) -> list[OsmElementT]: SLICE_SIZE = 10 INTERREQUEST_WAIT = 5 # in seconds result = [] @@ -96,8 +96,8 @@ def slugify(name: str) -> str: def get_way_center( - element: dict, node_centers: dict[int, Point] -) -> Point | None: + element: OsmElementT, node_centers: dict[int, LonLat] +) -> LonLat | None: """ :param element: dict describing OSM element :param node_centers: osm_id => (lat, lon) @@ -107,7 +107,7 @@ def get_way_center( # If elements have been queried via overpass-api with # 'out center;' clause then ways already have 'center' attribute if "center" in element: - return element["center"]["lat"], element["center"]["lon"] + return element["center"]["lon"], element["center"]["lat"] if "nodes" not in element: return None @@ -131,22 +131,22 @@ def get_way_center( count += 1 if count == 0: return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] def get_relation_center( - element: dict, - node_centers: dict[int, Point], - way_centers: dict[int, Point], - relation_centers: dict[int, Point], + element: OsmElementT, + node_centers: dict[int, LonLat], + way_centers: dict[int, LonLat], + relation_centers: dict[int, LonLat], ignore_unlocalized_child_relations: bool = False, -) -> Point | None: +) -> LonLat | None: """ :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :param way_centers: osm_id => (lat, lon) - :param relation_centers: osm_id => (lat, lon) + :param node_centers: osm_id => LonLat + :param way_centers: osm_id => LonLat + :param relation_centers: osm_id => LonLat :param ignore_unlocalized_child_relations: if a member that is a relation has no center, skip it and calculate center based on member nodes, ways and other, "localized" (with known centers), relations @@ -159,7 +159,7 @@ def get_relation_center( # of other relations (e.g., route_master, stop_area_group or # stop_area with only members that are multipolygons) if "center" in element: - return element["center"]["lat"], element["center"]["lon"] + return element["center"]["lon"], element["center"]["lat"] center = [0, 0] count = 0 @@ -186,25 +186,25 @@ def get_relation_center( count += 1 if count == 0: return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] -def calculate_centers(elements: list[dict]) -> None: +def calculate_centers(elements: list[OsmElementT]) -> None: """Adds 'center' key to each way/relation in elements, except for empty ways or relations. Relies on nodes-ways-relations order in the elements list. """ - nodes: dict[int, Point] = {} # id => (lat, lon) - ways: dict[int, Point] = {} # id => (lat, lon) - relations: dict[int, Point] = {} # id => (lat, lon) + nodes: dict[int, LonLat] = {} # id => LonLat + ways: dict[int, LonLat] = {} # id => approx center LonLat + relations: dict[int, LonLat] = {} # id => approx center LonLat - unlocalized_relations = [] # 'unlocalized' means the center of the - # relation has not been calculated yet + unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means + # the center of the relation has not been calculated yet for el in elements: if el["type"] == "node": - nodes[el["id"]] = (el["lat"], el["lon"]) + nodes[el["id"]] = (el["lon"], el["lat"]) elif el["type"] == "way": if center := get_way_center(el, nodes): ways[el["id"]] = center @@ -216,7 +216,7 @@ def calculate_centers(elements: list[dict]) -> None: def iterate_relation_centers_calculation( ignore_unlocalized_child_relations: bool, - ) -> list[dict]: + ) -> list[OsmElementT]: unlocalized_relations_upd = [] for rel in unlocalized_relations: if center := get_relation_center( @@ -244,7 +244,7 @@ def iterate_relation_centers_calculation( def add_osm_elements_to_cities( - osm_elements: list[dict], cities: list[City] + osm_elements: list[OsmElementT], cities: list[City] ) -> None: for el in osm_elements: for c in cities: diff --git a/processors/_common.py b/processors/_common.py index edb19f43..d60ff070 100644 --- a/processors/_common.py +++ b/processors/_common.py @@ -1,6 +1,4 @@ -from typing import List, Set - -from subway_structure import City, el_center, StopArea +from subway_structure import City, el_center, TransfersT DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier @@ -8,14 +6,12 @@ TRANSFER_PENALTY = 30 # seconds -def format_colour(colour): +def format_colour(colour: str | None) -> str | None: """Truncate leading # sign.""" return colour[1:] if colour else None -def transit_to_dict( - cities: List[City], transfers: List[Set[StopArea]] -) -> dict: +def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict: """Get data for good cities as a dictionary.""" data = { "stopareas": {}, # stoparea id => stoparea data diff --git a/processors/gtfs.py b/processors/gtfs.py index 5dc39526..463443ec 100644 --- a/processors/gtfs.py +++ b/processors/gtfs.py @@ -3,7 +3,6 @@ from io import BytesIO, StringIO from itertools import permutations from tarfile import TarFile, TarInfo -from typing import List, Optional, Set from zipfile import ZipFile from ._common import ( @@ -16,7 +15,7 @@ from subway_structure import ( City, distance, - StopArea, + TransfersT, ) @@ -133,13 +132,13 @@ } -def round_coords(coords_tuple): +def round_coords(coords_tuple: tuple) -> tuple: return tuple( map(lambda coord: round(coord, COORDINATE_PRECISION), coords_tuple) ) -def transit_data_to_gtfs(data): +def transit_data_to_gtfs(data: dict) -> dict: # Keys correspond GTFS file names gtfs_data = {key: [] for key in GTFS_COLUMNS.keys()} @@ -313,14 +312,14 @@ def transit_data_to_gtfs(data): def process( - cities: List[City], - transfers: List[Set[StopArea]], + cities: list[City], + transfers: TransfersT, filename: str, - cache_path: str, -): + cache_path: str | None, +) -> None: """Generate all output and save to file. - :param cities: List of City instances - :param transfers: List of sets of StopArea.id + :param cities: list of City instances + :param transfers: all collected transfers in the world :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ @@ -344,9 +343,7 @@ def dict_to_row(dict_data: dict, record_type: str) -> list: ] -def make_gtfs( - filename: str, gtfs_data: dict, fmt: Optional[str] = None -) -> None: +def make_gtfs(filename: str, gtfs_data: dict, fmt: str | None = None) -> None: if not fmt: fmt = "tar" if filename.endswith(".tar") else "zip" diff --git a/processors/mapsme.py b/processors/mapsme.py index 2f3ec6f2..e87ffe0e 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -2,13 +2,19 @@ import logging import os from collections import defaultdict +from collections.abc import Callable +from typing import Any, TypeAlias from subway_structure import ( City, DISPLACEMENT_TOLERANCE, distance, el_center, + IdT, + LonLat, + OsmElementT, Station, + StopArea, TransfersT, ) from ._common import ( @@ -19,14 +25,16 @@ TRANSFER_PENALTY, ) - OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s +# (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid +TransferTimesT: TypeAlias = dict[tuple[int, int], int] + -def uid(elid, typ=None): +def uid(elid: IdT, typ: str | None = None) -> int: t = elid[0] osm_id = int(elid[1:]) if not typ: @@ -39,24 +47,24 @@ def uid(elid, typ=None): class DummyCache: """This class may be used when you need to omit all cache processing""" - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: pass - def __getattr__(self, name): + def __getattr__(self, name: str) -> Callable[..., None]: """This results in that a call to any method effectively does nothing and does not generate exceptions.""" - def method(*args, **kwargs): + def method(*args, **kwargs) -> None: return None return method -def if_object_is_used(method): +def if_object_is_used(method: Callable) -> Callable: """Decorator to skip method execution under certain condition. Relies on "is_used" object property.""" - def inner(self, *args, **kwargs): + def inner(self, *args, **kwargs) -> Any: if not self.is_used: return return method(self, *args, **kwargs) @@ -65,7 +73,7 @@ def inner(self, *args, **kwargs): class MapsmeCache: - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: if not cache_path: # Cache is not used, # all actions with cache must be silently skipped @@ -90,7 +98,7 @@ def __init__(self, cache_path, cities): self.city_dict = {c.name: c for c in cities} self.good_city_names = {c.name for c in cities if c.is_good} - def _is_cached_city_usable(self, city): + def _is_cached_city_usable(self, city: City) -> bool: """Check if cached stations still exist in osm data and not moved far away. """ @@ -105,8 +113,9 @@ def _is_cached_city_usable(self, city): ): return False station_coords = el_center(city_station) - cached_station_coords = tuple( - cached_stoparea[coord] for coord in ("lon", "lat") + cached_station_coords = ( + cached_stoparea["lon"], + cached_stoparea["lat"], ) displacement = distance(station_coords, cached_station_coords) if displacement > DISPLACEMENT_TOLERANCE: @@ -115,7 +124,9 @@ def _is_cached_city_usable(self, city): return True @if_object_is_used - def provide_stops_and_networks(self, stops, networks): + def provide_stops_and_networks( + self, stops: dict, networks: list[dict] + ) -> None: """Put stops and networks for bad cities into containers passed as arguments.""" for city in self.city_dict.values(): @@ -128,7 +139,7 @@ def provide_stops_and_networks(self, stops, networks): self.recovered_city_names.add(city.name) @if_object_is_used - def provide_transfers(self, transfers): + def provide_transfers(self, transfers: TransferTimesT) -> None: """Add transfers from usable cached cities to 'transfers' dict passed as argument.""" for city_name in self.recovered_city_names: @@ -138,7 +149,7 @@ def provide_transfers(self, transfers): transfers[(stop1_uid, stop2_uid)] = transfer_time @if_object_is_used - def initialize_good_city(self, city_name, network): + def initialize_good_city(self, city_name: str, network: dict) -> None: """Create/replace one cache element with new data container. This should be done for each good city.""" self.cache[city_name] = { @@ -149,20 +160,22 @@ def initialize_good_city(self, city_name, network): } @if_object_is_used - def link_stop_with_city(self, stoparea_id, city_name): + def link_stop_with_city(self, stoparea_id: IdT, city_name: str) -> None: """Remember that some stop_area is used in a city.""" stoparea_uid = uid(stoparea_id) self.stop_cities[stoparea_uid].add(city_name) @if_object_is_used - def add_stop(self, stoparea_id, st): + def add_stop(self, stoparea_id: IdT, st: dict) -> None: """Add stoparea to the cache of each city the stoparea is in.""" stoparea_uid = uid(stoparea_id) for city_name in self.stop_cities[stoparea_uid]: self.cache[city_name]["stops"][stoparea_id] = st @if_object_is_used - def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): + def add_transfer( + self, stoparea1_uid: int, stoparea2_uid: int, transfer_time: int + ) -> None: """If a transfer is inside a good city, add it to the city's cache.""" for city_name in ( self.good_city_names @@ -174,7 +187,7 @@ def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): ) @if_object_is_used - def save(self): + def save(self) -> None: try: with open(self.cache_path, "w", encoding="utf-8") as f: json.dump(self.cache, f, ensure_ascii=False) @@ -191,7 +204,9 @@ def transit_data_to_mapsme( :param cache_path: Path to json-file with good cities cache or None. """ - def find_exits_for_platform(center, nodes): + def find_exits_for_platform( + center: LonLat, nodes: list[OsmElementT] + ) -> list[OsmElementT]: exits = [] min_distance = None for n in nodes: @@ -212,8 +227,8 @@ def find_exits_for_platform(center, nodes): cache = MapsmeCache(cache_path, cities) - stop_areas = {} # stoparea el_id -> StopArea instance - stops = {} # stoparea el_id -> stop jsonified data + stop_areas: dict[IdT, StopArea] = {} + stops: dict[IdT, dict] = {} # stoparea el_id -> stop jsonified data networks = [] good_cities = [c for c in cities if c.is_good] platform_nodes = {} @@ -362,9 +377,7 @@ def find_exits_for_platform(center, nodes): stops[stop_id] = st cache.add_stop(stop_id, st) - pairwise_transfers = ( - {} - ) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 + pairwise_transfers: TransferTimesT = {} for stoparea_id_set in transfers: stoparea_ids = list(stoparea_id_set) for i_first in range(len(stoparea_ids) - 1): @@ -388,14 +401,14 @@ def find_exits_for_platform(center, nodes): cache.provide_transfers(pairwise_transfers) cache.save() - pairwise_transfers = [ + pairwise_transfers_list = [ (stop1_uid, stop2_uid, transfer_time) for (stop1_uid, stop2_uid), transfer_time in pairwise_transfers.items() ] result = { "stops": list(stops.values()), - "transfers": pairwise_transfers, + "transfers": pairwise_transfers_list, "networks": networks, } return result @@ -406,10 +419,10 @@ def process( transfers: TransfersT, filename: str, cache_path: str | None, -): +) -> None: """Generate all output and save to file. - :param cities: List of City instances - :param transfers: List of sets of StopArea.id + :param cities: list of City instances + :param transfers: all collected transfers in the world :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ diff --git a/subway_io.py b/subway_io.py index 4b025965..8ef5f6ff 100644 --- a/subway_io.py +++ b/subway_io.py @@ -1,15 +1,18 @@ import json import logging from collections import OrderedDict +from typing import Any, TextIO +from subway_structure import City, OsmElementT, StopArea -def load_xml(f): + +def load_xml(f: TextIO | str) -> list[OsmElementT]: try: from lxml import etree except ImportError: import xml.etree.ElementTree as etree - elements = [] + elements: list[OsmElementT] = [] for event, element in etree.iterparse(f): if element.tag in ("node", "way", "relation"): @@ -49,7 +52,7 @@ def load_xml(f): _YAML_SPECIAL_SEQUENCES = ("- ", ": ", "? ") -def _get_yaml_compatible_string(scalar): +def _get_yaml_compatible_string(scalar: Any) -> str: """Enclose string in single quotes in some cases""" string = str(scalar) if string and ( @@ -62,8 +65,8 @@ def _get_yaml_compatible_string(scalar): return string -def dump_yaml(city, f): - def write_yaml(data, f, indent=""): +def dump_yaml(city: City, f: TextIO) -> None: + def write_yaml(data: dict, f: TextIO, indent: str = "") -> None: if isinstance(data, (set, list)): f.write("\n") for i in data: @@ -138,10 +141,10 @@ def write_yaml(data, f, indent=""): write_yaml(result, f) -def make_geojson(city, include_tracks_geometry=True): - transfers = set() +def make_geojson(city: City, include_tracks_geometry: bool = True) -> dict: + stopareas_in_transfers: set[StopArea] = set() for t in city.transfers: - transfers.update(t) + stopareas_in_transfers.update(t) features = [] stopareas = set() stops = set() @@ -196,7 +199,7 @@ def make_geojson(city, include_tracks_geometry=True): "name": stoparea.name, "marker-size": "small", "marker-color": "#ff2600" - if stoparea in transfers + if stoparea in stopareas_in_transfers else "#797979", }, } @@ -204,7 +207,7 @@ def make_geojson(city, include_tracks_geometry=True): return {"type": "FeatureCollection", "features": features} -def _dumps_route_id(route_id): +def _dumps_route_id(route_id: tuple[str | None, str | None]) -> str: """Argument is a route_id that depends on route colour and ref. Name can be taken from route_master or can be route's own, we don't take it into consideration. Some of route attributes can be None. The function makes @@ -212,13 +215,13 @@ def _dumps_route_id(route_id): return json.dumps(route_id, ensure_ascii=False) -def _loads_route_id(route_id_dump): +def _loads_route_id(route_id_dump: str) -> tuple[str | None, str | None]: """Argument is a json-encoded identifier of a route. Return a tuple (colour, ref).""" return tuple(json.loads(route_id_dump)) -def read_recovery_data(path): +def read_recovery_data(path: str) -> dict: """Recovery data is a json with data from previous transport builds. It helps to recover cities from some errors, e.g. by resorting shuffled stations in routes.""" @@ -246,11 +249,15 @@ def read_recovery_data(path): return data -def write_recovery_data(path, current_data, cities): +def write_recovery_data( + path: str, current_data: dict, cities: list[City] +) -> None: """Updates recovery data with good cities data and writes to file.""" - def make_city_recovery_data(city): - routes = {} + def make_city_recovery_data( + city: City, + ) -> dict[tuple[str | None, str | None], list[dict]]: + routes: dict[tuple(str | None, str | None), list[dict]] = {} for route in city: # Recovery is based primarily on route/station names/refs. # If route's ref/colour changes, the route won't be used. diff --git a/subway_structure.py b/subway_structure.py index d486d90e..94c6f479 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -3,9 +3,9 @@ import math import re from collections import Counter, defaultdict -from collections.abc import Collection, Iterator +from collections.abc import Callable, Collection, Iterator from itertools import chain, islice -from typing import TypeVar +from typing import TypeAlias, TypeVar from css_colours import normalize_colour @@ -47,13 +47,18 @@ START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") -IdT = str # Type of feature ids -TransferT = set[IdT] # A transfer is a set of StopArea IDs -TransfersT = Collection[TransferT] +OsmElementT: TypeAlias = dict +IdT: TypeAlias = str # Type of feature ids +TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs +TransfersT: TypeAlias = list[TransferT] +LonLat: TypeAlias = tuple[float, float] +RailT: TypeAlias = list[LonLat] T = TypeVar("T") -def get_start_end_times(opening_hours): +def get_start_end_times( + opening_hours: str, +) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable opening hours interval for the most of the weekdays. @@ -67,7 +72,7 @@ def get_start_end_times(opening_hours): return start_time, end_time -def osm_interval_to_seconds(interval_str): +def osm_interval_to_seconds(interval_str: str) -> int | None: """Convert to int an OSM value for 'interval'/'headway' tag which may be in these formats: HH:MM:SS, @@ -97,7 +102,7 @@ class CriticalValidationError(Exception): that prevents further validation of a city.""" -def el_id(el): +def el_id(el: OsmElementT) -> IdT | None: if not el: return None if "type" not in el: @@ -105,7 +110,7 @@ def el_id(el): return el["type"][0] + str(el.get("id", el.get("ref", ""))) -def el_center(el): +def el_center(el: OsmElementT) -> LonLat | None: if not el: return None if "lat" in el: @@ -115,7 +120,7 @@ def el_center(el): return None -def distance(p1, p2): +def distance(p1: LonLat, p2: LonLat) -> float: if p1 is None or p2 is None: raise Exception( "One of arguments to distance({}, {}) is None".format(p1, p2) @@ -127,14 +132,14 @@ def distance(p1, p2): return 6378137 * math.sqrt(dx * dx + dy * dy) -def is_near(p1, p2): +def is_near(p1: LonLat, p2: LonLat) -> bool: return ( p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 ) -def project_on_segment(p, p1, p2): +def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: """Given three points, return u - the position of projection of point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector """ @@ -148,7 +153,7 @@ def project_on_segment(p, p1, p2): return u -def project_on_line(p, line): +def project_on_line(p: LonLat, line: RailT) -> dict: result = { # In the first approximation, position on rails is the index of the # closest vertex of line to the point p. Fractional value means that @@ -212,7 +217,9 @@ def project_on_line(p, line): return result -def find_segment(p, line, start_vertex=0): +def find_segment( + p: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[int, float] | tuple[None, None]: """Returns index of a segment and a position inside it.""" EPS = 1e-9 for seg in range(start_vertex, len(line) - 1): @@ -237,7 +244,9 @@ def find_segment(p, line, start_vertex=0): return None, None -def distance_on_line(p1, p2, line, start_vertex=0): +def distance_on_line( + p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[float, int] | None: """Calculates distance via line between projections of points p1 and p2. Returns a TUPLE of (d, vertex): d is the distance and vertex is the number of the second @@ -270,7 +279,7 @@ def distance_on_line(p1, p2, line, start_vertex=0): return d, seg2 % line_len -def angle_between(p1, c, p2): +def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: a = round( abs( math.degrees( @@ -282,7 +291,7 @@ def angle_between(p1, c, p2): return a if a <= 180 else 360 - a -def format_elid_list(ids): +def format_elid_list(ids: Collection[IdT]) -> str: msg = ", ".join(sorted(ids)[:20]) if len(ids) > 20: msg += ", ..." @@ -291,14 +300,14 @@ def format_elid_list(ids): class Station: @staticmethod - def get_modes(el: dict) -> set[str]: + def get_modes(el: OsmElementT) -> set[str]: modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} if mode := el["tags"].get("station"): modes.add(mode) return modes @staticmethod - def is_station(el, modes): + def is_station(el: OsmElementT, modes: set[str]) -> bool: # public_transport=station is too ambiguous and unspecific to use, # so we expect for it to be backed by railway=station. if ( @@ -316,7 +325,7 @@ def is_station(el, modes): return False return True - def __init__(self, el, city): + def __init__(self, el: OsmElementT, city: City) -> None: """Call this with a railway=station node.""" if not Station.is_station(el, city.modes): raise Exception( @@ -324,8 +333,8 @@ def __init__(self, el, city): "Got: {}".format(el) ) - self.id = el_id(el) - self.element = el + self.id: IdT = el_id(el) + self.element: OsmElementT = el self.modes = Station.get_modes(el) self.name = el["tags"].get("name", "?") self.int_name = el["tags"].get( @@ -340,7 +349,7 @@ def __init__(self, el, city): if self.center is None: raise Exception("Could not find center of {}".format(el)) - def __repr__(self): + def __repr__(self) -> str: return "Station(id={}, modes={}, name={}, center={})".format( self.id, ",".join(self.modes), self.name, self.center ) @@ -348,7 +357,7 @@ def __repr__(self): class StopArea: @staticmethod - def is_stop(el): + def is_stop(el: OsmElementT) -> bool: if "tags" not in el: return False if el["tags"].get("railway") == "stop": @@ -358,7 +367,7 @@ def is_stop(el): return False @staticmethod - def is_platform(el): + def is_platform(el: OsmElementT) -> bool: if "tags" not in el: return False if el["tags"].get("railway") in ("platform", "platform_edge"): @@ -368,19 +377,22 @@ def is_platform(el): return False @staticmethod - def is_track(el): + def is_track(el: OsmElementT) -> bool: if el["type"] != "way" or "tags" not in el: return False return el["tags"].get("railway") in RAILWAY_TYPES def __init__( - self, station: Station, city: City, stop_area: StopArea | None = None + self, + station: Station, + city: City, + stop_area: OsmElementT | None = None, ) -> None: """Call this with a Station object.""" - self.element = stop_area or station.element - self.id = el_id(self.element) - self.station = station + self.element: OsmElementT = stop_area or station.element + self.id: IdT = el_id(self.element) + self.station: Station = station self.stops = set() # set of el_ids of stop_positions self.platforms = set() # set of el_ids of platforms self.exits = set() # el_id of subway_entrance/train_station_entrance @@ -440,7 +452,7 @@ def __init__( self.center[i] /= len(self.stops) + len(self.platforms) def _process_members( - self, station: Station, city: City, stop_area: dict + self, station: Station, city: City, stop_area: OsmElementT ) -> None: # If we have a stop area, add all elements from it tracks_detected = False @@ -503,7 +515,7 @@ def _add_nearby_entrances(self, station: Station, city: City) -> None: if etag != "entrance": self.exits.add(entrance_id) - def get_elements(self): + def get_elements(self) -> set[IdT]: result = {self.id, self.station.id} result.update(self.entrances) result.update(self.exits) @@ -511,7 +523,7 @@ def get_elements(self): result.update(self.platforms) return result - def __repr__(self): + def __repr__(self) -> str: return ( f"StopArea(id={self.id}, name={self.name}, station={self.station}," f" transfer={self.transfer}, center={self.center})" @@ -519,9 +531,9 @@ def __repr__(self): class RouteStop: - def __init__(self, stoparea): - self.stoparea = stoparea - self.stop = None # Stop position (lon, lat), possibly projected + def __init__(self, stoparea: StopArea) -> None: + self.stoparea: StopArea = stoparea + self.stop: LonLat = None # Stop position, possibly projected self.distance = 0 # In meters from the start of the route self.platform_entry = None # Platform el_id self.platform_exit = None # Platform el_id @@ -533,11 +545,13 @@ def __init__(self, stoparea): self.seen_station = False @property - def seen_platform(self): + def seen_platform(self) -> bool: return self.seen_platform_entry or self.seen_platform_exit @staticmethod - def get_actual_role(el, role, modes): + def get_actual_role( + el: OsmElementT, role: str, modes: set[str] + ) -> str | None: if StopArea.is_stop(el): return "stop" elif StopArea.is_platform(el): @@ -549,7 +563,7 @@ def get_actual_role(el, role, modes): return "stop" return None - def add(self, member, relation, city): + def add(self, member: dict, relation: OsmElementT, city: City) -> None: el = city.elements[el_id(member)] role = member["role"] @@ -616,7 +630,7 @@ def add(self, member, relation, city): relation, ) - def __repr__(self): + def __repr__(self) -> str: return ( "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( self.stop, @@ -628,10 +642,10 @@ def __repr__(self): class Route: - """Corresponds to OSM "type=route" relation""" + """The longest route for a city with a unique ref.""" @staticmethod - def is_route(el, modes): + def is_route(el: OsmElementT, modes: set[str]) -> bool: if ( el["type"] != "relation" or el.get("tags", {}).get("type") != "route" @@ -649,14 +663,14 @@ def is_route(el, modes): return True @staticmethod - def get_network(relation): + def get_network(relation: OsmElementT) -> str | None: for k in ("network:metro", "network", "operator"): if k in relation["tags"]: return relation["tags"][k] return None @staticmethod - def get_interval(tags): + def get_interval(tags: dict) -> int | None: v = None for k in ("interval", "headway"): if k in tags: @@ -681,16 +695,16 @@ def stopareas(self) -> Iterator[StopArea]: def __init__( self, - relation: dict, + relation: OsmElementT, city: City, - master: dict | None = None, + master: OsmElementT | None = None, ) -> None: assert Route.is_route( relation, city.modes ), f"The relation does not seem to be a route: {relation}" self.city = city - self.element = relation - self.id = el_id(relation) + self.element: OsmElementT = relation + self.id: IdT = el_id(relation) self.ref = None self.name = None @@ -702,7 +716,7 @@ def __init__( self.start_time = None self.end_time = None self.is_circular = False - self.stops = [] # List of RouteStop + self.stops: list[RouteStop] = [] # Would be a list of (lon, lat) for the longest stretch. Can be empty. self.tracks = None # Index of the first stop that is located on/near the self.tracks @@ -714,10 +728,10 @@ def __init__( stop_position_elements = self.process_stop_members() self.process_tracks(stop_position_elements) - def build_longest_line(self): - line_nodes = set() - last_track = [] - track = [] + def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: + line_nodes: set[IdT] = set() + last_track: list[IdT] = [] + track: list[IdT] = [] warned_about_holes = False for m in self.element["members"]: el = self.city.elements.get(el_id(m), None) @@ -726,7 +740,7 @@ def build_longest_line(self): if "nodes" not in el or len(el["nodes"]) < 2: self.city.error("Cannot find nodes in a railway", el) continue - nodes = ["n{}".format(n) for n in el["nodes"]] + nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] if m["role"] == "backward": nodes.reverse() line_nodes.update(nodes) @@ -773,10 +787,10 @@ def build_longest_line(self): ] return last_track, line_nodes - def get_stop_projections(self): + def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - def stop_near_tracks_criterion(stop_index: int): + def stop_near_tracks_criterion(stop_index: int) -> bool: return ( projected[stop_index]["projected_point"] is not None and distance( @@ -788,14 +802,14 @@ def stop_near_tracks_criterion(stop_index: int): return projected, stop_near_tracks_criterion - def project_stops_on_line(self): + def project_stops_on_line(self) -> dict: projected, stop_near_tracks_criterion = self.get_stop_projections() projected_stops_data = { "first_stop_on_rails_index": None, "last_stop_on_rails_index": None, "stops_on_longest_line": [], # list [{'route_stop': RouteStop, - # 'coords': (lon, lat), + # 'coords': LonLat, # 'positions_on_rails': [] } } first_index = 0 @@ -848,7 +862,7 @@ def project_stops_on_line(self): projected_stops_data["stops_on_longest_line"].append(stop_data) return projected_stops_data - def calculate_distances(self): + def calculate_distances(self) -> None: dist = 0 vertex = 0 for i, stop in enumerate(self.stops): @@ -870,7 +884,7 @@ def calculate_distances(self): dist += round(direct) stop.distance = dist - def process_tags(self, master): + def process_tags(self, master: OsmElementT) -> None: relation = self.element master_tags = {} if not master else master["tags"] if "ref" not in relation["tags"] and "ref" not in master_tags: @@ -918,12 +932,12 @@ def process_tags(self, master): relation, ) - def process_stop_members(self): - stations = set() # temporary for recording stations + def process_stop_members(self) -> list[OsmElementT]: + stations: set[StopArea] = set() # temporary for recording stations seen_stops = False seen_platforms = False repeat_pos = None - stop_position_elements = [] + stop_position_elements: list[OsmElementT] = [] for m in self.element["members"]: if "inactive" in m["role"]: continue @@ -1072,7 +1086,9 @@ def process_stop_members(self): ) return stop_position_elements - def process_tracks(self, stop_position_elements: list[dict]) -> None: + def process_tracks( + self, stop_position_elements: list[OsmElementT] + ) -> None: tracks, line_nodes = self.build_longest_line() for stop_el in stop_position_elements: @@ -1130,7 +1146,7 @@ def apply_projected_stops_data(self, projected_stops_data: dict) -> None: if stop_coords := stop_data["coords"]: route_stop.stop = stop_coords - def get_extended_tracks(self): + def get_extended_tracks(self) -> RailT: """Amend tracks with points of leading/trailing self.stops that were not projected onto the longest tracks line. Return a new array. @@ -1153,7 +1169,7 @@ def get_extended_tracks(self): ) return tracks - def get_truncated_tracks(self, tracks): + def get_truncated_tracks(self, tracks: RailT) -> RailT: """Truncate leading/trailing segments of `tracks` param that are beyond the first and last stop locations. Return a new array. @@ -1194,12 +1210,12 @@ def are_tracks_complete(self) -> bool: and self.last_stop_on_rails_index == len(self) - 1 ) - def get_tracks_geometry(self): + def get_tracks_geometry(self) -> RailT: tracks = self.get_extended_tracks() tracks = self.get_truncated_tracks(tracks) return tracks - def check_stops_order_by_angle(self) -> tuple[list, list]: + def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: disorder_warnings = [] disorder_errors = [] for i, route_stop in enumerate( @@ -1222,7 +1238,9 @@ def check_stops_order_by_angle(self) -> tuple[list, list]: disorder_warnings.append(msg) return disorder_warnings, disorder_errors - def check_stops_order_on_tracks_direct(self, stop_sequence) -> str | None: + def check_stops_order_on_tracks_direct( + self, stop_sequence: Iterator[dict] + ) -> str | None: """Checks stops order on tracks, following stop_sequence in direct order only. :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', @@ -1253,7 +1271,9 @@ def check_stops_order_on_tracks_direct(self, stop_sequence) -> str | None: ) max_position_on_rails = positions_on_rails[suitable_occurrence] - def check_stops_order_on_tracks(self, projected_stops_data) -> str | None: + def check_stops_order_on_tracks( + self, projected_stops_data: dict + ) -> str | None: """Checks stops order on tracks, trying direct and reversed order of stops in the stop_sequence. :param projected_stops_data: info about RouteStops that belong to the @@ -1280,7 +1300,9 @@ def check_stops_order_on_tracks(self, projected_stops_data) -> str | None: return error_message - def check_stops_order(self, projected_stops_data): + def check_stops_order( + self, projected_stops_data: dict + ) -> tuple[list[str], list[str]]: ( angle_disorder_warnings, angle_disorder_errors, @@ -1294,7 +1316,9 @@ def check_stops_order(self, projected_stops_data): disorder_errors.append(disorder_on_tracks_error) return disorder_warnings, disorder_errors - def check_and_recover_stops_order(self, projected_stops_data: dict): + def check_and_recover_stops_order( + self, projected_stops_data: dict + ) -> None: """ :param projected_stops_data: may change if we need to reverse tracks """ @@ -1319,7 +1343,7 @@ def check_and_recover_stops_order(self, projected_stops_data: dict): for msg in disorder_errors: self.city.error(msg, self.element) - def try_resort_stops(self): + def try_resort_stops(self) -> bool: """Precondition: self.city.recovery_data is not None. Return success of station order recovering.""" self_stops = {} # station name => RouteStop @@ -1388,7 +1412,7 @@ def try_resort_stops(self): ] return True - def get_end_transfers(self) -> tuple[str, str]: + def get_end_transfers(self) -> tuple[IdT, IdT]: """Using transfer ids because a train can arrive at different stations within a transfer. But disregard transfer that may give an impression of a circular route (for example, @@ -1406,7 +1430,7 @@ def get_end_transfers(self) -> tuple[str, str]: ) ) - def get_transfers_sequence(self) -> list[str]: + def get_transfers_sequence(self) -> list[IdT]: """Return a list of stoparea or transfer (if not None) ids.""" transfer_seq = [ stop.stoparea.transfer or stop.stoparea.id for stop in self @@ -1418,16 +1442,16 @@ def get_transfers_sequence(self) -> list[str]: transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() return transfer_seq - def __len__(self): + def __len__(self) -> int: return len(self.stops) - def __getitem__(self, i): + def __getitem__(self, i) -> RouteStop: return self.stops[i] - def __iter__(self): + def __iter__(self) -> Iterator[RouteStop]: return iter(self.stops) - def __repr__(self): + def __repr__(self) -> str: return ( "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " "circular={}, num_stops={}, line_length={} m, from={}, to={}" @@ -1447,11 +1471,11 @@ def __repr__(self): class RouteMaster: - def __init__(self, city: City, master: dict = None) -> None: + def __init__(self, city: City, master: OsmElementT = None) -> None: self.city = city self.routes = [] - self.best = None - self.id = el_id(master) + self.best: Route = None + self.id: IdT = el_id(master) self.has_master = master is not None self.interval_from_master = False if master: @@ -1871,16 +1895,16 @@ def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: stops_that_dont_match, ) - def __len__(self): + def __len__(self) -> int: return len(self.routes) - def __getitem__(self, i): + def __getitem__(self, i) -> Route: return self.routes[i] - def __iter__(self): + def __iter__(self) -> Iterator[Route]: return iter(self.routes) - def __repr__(self): + def __repr__(self) -> str: return ( f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " f"name={self.name}, network={self.network}, " @@ -1891,11 +1915,11 @@ def __repr__(self): class City: route_class = Route - def __init__(self, city_data, overground=False): + def __init__(self, city_data: dict, overground: bool = False) -> None: self.validate_called = False - self.errors = [] - self.warnings = [] - self.notices = [] + self.errors: list[str] = [] + self.warnings: list[str] = [] + self.notices: list[str] = [] self.id = None self.try_fill_int_attribute(city_data, "id") self.name = city_data["name"] @@ -1940,16 +1964,14 @@ def __init__(self, city_data, overground=False): else: self.bbox = None - self.elements = {} # Dict el_id → el - self.stations = defaultdict(list) # Dict el_id → list of StopAreas - self.routes = {} # Dict route_master_ref → RouteMaster - self.masters = {} # Dict el_id of route → route_master - self.stop_areas = defaultdict( - list - ) # El_id → list of stop_area elements it belongs to - self.transfers: TransfersT = [] # List of sets of stop areas - self.station_ids = set() # Set of stations' uid - self.stops_and_platforms = set() # Set of stops and platforms el_id + self.elements: dict[IdT, OsmElementT] = {} + self.stations: dict[IdT, list[StopArea]] = defaultdict(list) + self.routes: dict[str, RouteMaster] = {} # keys are route_master refs + self.masters: dict[IdT, OsmElementT] = {} # Route id → master element + self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) + self.transfers: list[set[StopArea]] = [] + self.station_ids: set[IdT] = set() + self.stops_and_platforms: set[IdT] = set() self.recovery_data = None def try_fill_int_attribute( @@ -1980,7 +2002,7 @@ def try_fill_int_attribute( setattr(self, attr, attr_int) @staticmethod - def log_message(message, el): + def log_message(message: str, el: OsmElementT) -> str: if el: tags = el.get("tags", {}) message += ' ({} {}, "{}")'.format( @@ -1990,24 +2012,24 @@ def log_message(message, el): ) return message - def notice(self, message, el=None): + def notice(self, message: str, el: OsmElementT | None = None) -> None: """This type of message may point to a potential problem.""" msg = City.log_message(message, el) self.notices.append(msg) - def warn(self, message, el=None): + def warn(self, message: str, el: OsmElementT | None = None) -> None: """A warning is definitely a problem but is doesn't prevent from building a routing file and doesn't invalidate the city. """ msg = City.log_message(message, el) self.warnings.append(msg) - def error(self, message, el=None): + def error(self, message: str, el: OsmElementT | None = None) -> None: """Error is a critical problem that invalidates the city.""" msg = City.log_message(message, el) self.errors.append(msg) - def contains(self, el): + def contains(self, el: OsmElementT) -> bool: center = el_center(el) if center: return ( @@ -2016,7 +2038,7 @@ def contains(self, el): ) return False - def add(self, el): + def add(self, el: OsmElementT) -> None: if el["type"] == "relation" and "members" not in el: return @@ -2052,8 +2074,8 @@ def add(self, el): else: stop_areas.append(el) - def make_transfer(self, stoparea_group: dict) -> None: - transfer = set() + def make_transfer(self, stoparea_group: OsmElementT) -> None: + transfer: set[StopArea] = set() for m in stoparea_group["members"]: k = el_id(m) el = self.elements.get(k) @@ -2195,7 +2217,7 @@ def extract_routes(self) -> None: if len(inner_transfer) > 1 ] - def __iter__(self): + def __iter__(self) -> Iterator[RouteMaster]: return iter(self.routes.values()) def stopareas(self) -> Iterator[StopArea]: @@ -2207,7 +2229,7 @@ def stopareas(self) -> Iterator[StopArea]: yielded_stopareas.add(stoparea) @property - def is_good(self): + def is_good(self) -> bool: if not (self.errors or self.validate_called): raise RuntimeError( "You mustn't refer to City.is_good property before calling " @@ -2215,7 +2237,7 @@ def is_good(self): ) return len(self.errors) == 0 - def get_validation_result(self): + def get_validation_result(self) -> dict: result = { "name": self.name, "country": self.country, @@ -2260,7 +2282,7 @@ def get_validation_result(self): result["notices"] = self.notices return result - def count_unused_entrances(self): + def count_unused_entrances(self) -> None: global used_entrances stop_areas = set() for el in self.elements.values(): @@ -2299,7 +2321,7 @@ def count_unused_entrances(self): f"relations: {format_elid_list(not_in_sa)}" ) - def validate_lines(self): + def validate_lines(self) -> None: self.found_light_lines = len( [x for x in self.routes.values() if x.mode != "subway"] ) @@ -2317,7 +2339,7 @@ def validate_lines(self): ) ) - def validate_overground_lines(self): + def validate_overground_lines(self) -> None: self.found_tram_lines = len( [x for x in self.routes.values() if x.mode == "tram"] ) @@ -2344,7 +2366,7 @@ def validate_overground_lines(self): ), ) - def validate(self): + def validate(self) -> None: networks = Counter() self.found_stations = 0 unused_stations = set(self.station_ids) @@ -2421,7 +2443,7 @@ def calculate_distances(self) -> None: def find_transfers( - elements: list[dict], cities: Collection[City] + elements: list[OsmElementT], cities: Collection[City] ) -> TransfersT: """As for now, two Cities may contain the same stoparea, but those StopArea instances would have different python id. So we don't store @@ -2457,7 +2479,7 @@ def find_transfers( return transfers -def get_unused_subway_entrances_geojson(elements: list[dict]) -> dict: +def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: global used_entrances features = [] for el in elements: diff --git a/validation_to_html.py b/validation_to_html.py index f772a4f5..0f9ec3b8 100755 --- a/validation_to_html.py +++ b/validation_to_html.py @@ -7,7 +7,7 @@ import os import re from collections import defaultdict -from typing import Any, Optional +from typing import Any from process_subways import DEFAULT_SPREADSHEET_ID from v2h_templates import ( @@ -22,8 +22,7 @@ class CityData: - def __init__(self, city: Optional[str] = None) -> None: - self.city = city is not None + def __init__(self, city: dict | None = None) -> None: self.data = { "good_cities": 0, "total_cities": 1 if city else 0, @@ -93,7 +92,7 @@ def format(self, s: str) -> str: return s -def tmpl(s: str, data: Optional[CityData] = None, **kwargs) -> str: +def tmpl(s: str, data: CityData | None = None, **kwargs) -> str: if data: s = data.format(s) if kwargs: From 60821b60d67727e4887a9341e89d16f760fc1c9b Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 5 Mar 2024 16:43:20 +0300 Subject: [PATCH 09/13] Refactor project structure --- .github/workflows/python-app.yml | 4 +- README.md | 38 +- process_subways.py | 577 ---- scripts/process_subways.py | 276 ++ scripts/process_subways.sh | 16 +- subway_structure.py | 2505 ----------------- subways/__init__.py | 92 + subways/consts.py | 26 + css_colours.py => subways/css_colours.py | 0 subways/geom_utils.py | 175 ++ subways/osm_element.py | 19 + subways/overpass.py | 60 + .../processors}/__init__.py | 6 +- {processors => subways/processors}/_common.py | 10 +- {processors => subways/processors}/gtfs.py | 13 +- {processors => subways/processors}/mapsme.py | 25 +- requirements.txt => subways/requirements.txt | 0 subways/structure/__init__.py | 17 + subways/structure/city.py | 626 ++++ subways/structure/route.py | 903 ++++++ subways/structure/route_master.py | 464 +++ subways/structure/route_stop.py | 122 + subways/structure/station.py | 62 + subways/structure/stop_area.py | 191 ++ subway_io.py => subways/subway_io.py | 14 +- {tests => subways/tests}/README.md | 0 {tests => subways/tests}/__init__.py | 0 .../assets/cities_info_with_bad_values.csv | 0 .../tests}/assets/route_masters.osm | 0 .../tests}/assets/tiny_world.osm | 0 .../tests}/assets/tiny_world_gtfs/agency.txt | 0 .../assets/tiny_world_gtfs/calendar.txt | 0 .../assets/tiny_world_gtfs/frequencies.txt | 0 .../tests}/assets/tiny_world_gtfs/routes.txt | 0 .../tests}/assets/tiny_world_gtfs/shapes.txt | 0 .../assets/tiny_world_gtfs/stop_times.txt | 0 .../tests}/assets/tiny_world_gtfs/stops.txt | 0 .../assets/tiny_world_gtfs/transfers.txt | 0 .../tests}/assets/tiny_world_gtfs/trips.txt | 0 .../tests}/assets/twin_routes.osm | 0 .../assets/twin_routes_with_divergence.osm | 0 .../tests}/sample_data_for_build_tracks.py | 0 .../sample_data_for_center_calculation.py | 0 .../tests}/sample_data_for_error_messages.py | 99 +- .../tests}/sample_data_for_outputs.py | 0 .../tests}/sample_data_for_twin_routes.py | 0 {tests => subways/tests}/test_build_tracks.py | 4 +- .../tests}/test_center_calculation.py | 6 +- .../tests}/test_error_messages.py | 6 +- .../tests}/test_find_transfers.py | 4 +- .../tests}/test_gtfs_processor.py | 12 +- .../tests}/test_mapsme_processor.py | 6 +- {tests => subways/tests}/test_overpass.py | 6 +- .../tests}/test_prepare_cities.py | 2 +- {tests => subways/tests}/test_projection.py | 22 +- {tests => subways/tests}/test_route_master.py | 6 +- {tests => subways/tests}/test_station.py | 2 +- {tests => subways/tests}/test_storage.py | 6 +- {tests => subways/tests}/util.py | 6 +- subways/types.py | 14 + subways/validation.py | 253 ++ {checkers => tools/checkers}/common.py | 0 .../checkers}/compare_city_caches.py | 0 .../checkers}/compare_json_outputs.py | 0 .../legacy/mapsme_json_to_cities.py | 10 +- .../make_poly/make_all_metro_poly.py | 2 +- tools/make_poly/tests/__init__.py | 0 .../tests}/assets/cities_info_1city.csv | 0 .../tests}/assets/cities_info_2cities.csv | 0 .../tests}/test_make_all_metro_poly.py | 7 +- .../stop_areas}/make_stop_areas.py | 0 .../stop_areas}/make_tram_areas.py | 0 .../stop_areas}/requirements.txt | 0 {stop_areas => tools/stop_areas}/serve.py | 0 .../stop_areas}/templates/index.html | 0 .../v2h/v2h_templates.py | 0 .../v2h/validation_to_html.py | 2 +- 77 files changed, 3535 insertions(+), 3181 deletions(-) delete mode 100755 process_subways.py create mode 100755 scripts/process_subways.py delete mode 100644 subway_structure.py create mode 100644 subways/__init__.py create mode 100644 subways/consts.py rename css_colours.py => subways/css_colours.py (100%) create mode 100644 subways/geom_utils.py create mode 100644 subways/osm_element.py create mode 100644 subways/overpass.py rename {processors => subways/processors}/__init__.py (56%) rename {processors => subways/processors}/_common.py (95%) rename {processors => subways/processors}/gtfs.py (98%) rename {processors => subways/processors}/mapsme.py (97%) rename requirements.txt => subways/requirements.txt (100%) create mode 100644 subways/structure/__init__.py create mode 100644 subways/structure/city.py create mode 100644 subways/structure/route.py create mode 100644 subways/structure/route_master.py create mode 100644 subways/structure/route_stop.py create mode 100644 subways/structure/station.py create mode 100644 subways/structure/stop_area.py rename subway_io.py => subways/subway_io.py (96%) rename {tests => subways/tests}/README.md (100%) rename {tests => subways/tests}/__init__.py (100%) rename {tests => subways/tests}/assets/cities_info_with_bad_values.csv (100%) rename {tests => subways/tests}/assets/route_masters.osm (100%) rename {tests => subways/tests}/assets/tiny_world.osm (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/agency.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/calendar.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/frequencies.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/routes.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/shapes.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/stop_times.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/stops.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/transfers.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/trips.txt (100%) rename {tests => subways/tests}/assets/twin_routes.osm (100%) rename {tests => subways/tests}/assets/twin_routes_with_divergence.osm (100%) rename {tests => subways/tests}/sample_data_for_build_tracks.py (100%) rename {tests => subways/tests}/sample_data_for_center_calculation.py (100%) rename {tests => subways/tests}/sample_data_for_error_messages.py (78%) rename {tests => subways/tests}/sample_data_for_outputs.py (100%) rename {tests => subways/tests}/sample_data_for_twin_routes.py (100%) rename {tests => subways/tests}/test_build_tracks.py (96%) rename {tests => subways/tests}/test_center_calculation.py (91%) rename {tests => subways/tests}/test_error_messages.py (86%) rename {tests => subways/tests}/test_find_transfers.py (88%) rename {tests => subways/tests}/test_gtfs_processor.py (95%) rename {tests => subways/tests}/test_mapsme_processor.py (89%) rename {tests => subways/tests}/test_overpass.py (97%) rename {tests => subways/tests}/test_prepare_cities.py (96%) rename {tests => subways/tests}/test_projection.py (86%) rename {tests => subways/tests}/test_route_master.py (96%) rename {tests => subways/tests}/test_station.py (96%) rename {tests => subways/tests}/test_storage.py (86%) rename {tests => subways/tests}/util.py (98%) create mode 100644 subways/types.py create mode 100644 subways/validation.py rename {checkers => tools/checkers}/common.py (100%) rename {checkers => tools/checkers}/compare_city_caches.py (100%) rename {checkers => tools/checkers}/compare_json_outputs.py (100%) rename mapsme_json_to_cities.py => tools/legacy/mapsme_json_to_cities.py (89%) rename make_all_metro_poly.py => tools/make_poly/make_all_metro_poly.py (95%) create mode 100644 tools/make_poly/tests/__init__.py rename {tests => tools/make_poly/tests}/assets/cities_info_1city.csv (100%) rename {tests => tools/make_poly/tests}/assets/cities_info_2cities.csv (100%) rename {tests => tools/make_poly/tests}/test_make_all_metro_poly.py (94%) rename {stop_areas => tools/stop_areas}/make_stop_areas.py (100%) rename {stop_areas => tools/stop_areas}/make_tram_areas.py (100%) rename {stop_areas => tools/stop_areas}/requirements.txt (100%) rename {stop_areas => tools/stop_areas}/serve.py (100%) rename {stop_areas => tools/stop_areas}/templates/index.html (100%) rename v2h_templates.py => tools/v2h/v2h_templates.py (100%) rename validation_to_html.py => tools/v2h/validation_to_html.py (99%) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b7352615..55ce3530 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -27,7 +27,7 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8==6.0.0 black==23.1.0 shapely==2.0.1 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -r subways/requirements.txt - name: Lint with flake8 run: | flake8 @@ -36,4 +36,4 @@ jobs: black --check --line-length 79 . - name: Test with unittest run: | - python -m unittest discover tests + python -m unittest discover tests \ No newline at end of file diff --git a/README.md b/README.md index b987e5f6..157e1ad2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Subway Preprocessor Here you see a list of scripts that can be used for preprocessing all the metro -systems in the world from OpenStreetMap. `subway_structure.py` produces +systems in the world from OpenStreetMap. `scripts/subway_structure.py` produces a list of disjunct systems that can be used for routing and for displaying of metro maps. @@ -16,14 +16,14 @@ of metro maps. 2. If you don't specify `--xml` or `--source` option to the `process_subways.py` script it tries to fetch data over [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API). **Not suitable for the whole planet or large countries.** -* Run `process_subways.py` with appropriate set of command line arguments +* Run `scripts/process_subways.py` with appropriate set of command line arguments to build metro structures and receive a validation log. -* Run `validation_to_html.py` on that log to create readable HTML tables. +* Run `tools/v2h/validation_to_html.py` on that log to create readable HTML tables. ## Validating of all metro networks -There is a `process_subways.sh` in the `scripts` directory that is suitable +There is a `scripts/process_subways.sh` script that is suitable for validation of all or many metro networks. It relies on a bunch of environment variables and takes advantage of previous validation runs for effective recurring validations. See @@ -51,17 +51,21 @@ a city's bbox has been extended. ## Validating of a single city A single city or a country with few metro networks can be validated much faster -if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps: +if you allow the `scripts/process_subway.py` to fetch data from Overpass API. Here are the steps: 1. Python3 interpreter required (3.11+) 2. Clone the repo - ``` + ```bash git clone https://github.com/alexey-zakharenkov/subways.git subways_validator cd subways_validator ``` -3. Execute +3. Install python dependencies + ```bash + pip install -r subways/requirements.txt + ``` +4. Execute ```bash - python3 ./process_subways.py -c "London" \ + python3 scripts/process_subways.py -c "London" \ -l validation.log -d London.yaml ``` here @@ -73,21 +77,21 @@ if you allow the `process_subway.py` to fetch data from Overpass API. Here are t `validation.log` would contain the list of errors and warnings. To convert it into pretty HTML format -4. do +5. do ```bash mkdir html - python3 ./validation_to_html.py validation.log html + python3 tools/v2h/validation_to_html.py validation.log html ``` ## Publishing validation reports to the Web Expose a directory with static contents via a web-server and put into it: -- HTML files from the directory specified in the 2nd parameter of `validation_to_html.py` +- HTML files from the directory specified in the 2nd parameter of `scripts/v2h/validation_to_html.py` - To vitalize "Y" (YAML), "J" (GeoJSON) and "M" (Map) links beside each city name: - The contents of `render` directory from the repository - - `cities.txt` file generated with `--dump-city-list` parameter of `process_subways.py` - - YAML files created due to -d option of `process_subways.py` - - GeoJSON files created due to -j option of `process_subways.py` + - `cities.txt` file generated with `--dump-city-list` parameter of `scripts/process_subways.py` + - YAML files created due to -d option of `scripts/process_subways.py` + - GeoJSON files created due to -j option of `scripts/process_subways.py` ## Related external resources @@ -103,9 +107,9 @@ You can find more info about this validator instance in ## Adding Stop Areas To OSM -To quickly add `stop_area` relations for the entire city, use the `make_stop_areas.py` script -from the `stop_area` directory. Give it a bounding box or a `.json` file download from Overpass API. -It would produce an JOSM XML file that you should manually check in JOSM. After that +To quickly add `stop_area` relations for the entire city, use the `tools/stop_areas/make_stop_areas.py` script. +Give it a bounding box or a `.json` file download from Overpass API. +It would produce a JOSM XML file that you should manually check in JOSM. After that just upload it. ## Author and License diff --git a/process_subways.py b/process_subways.py deleted file mode 100755 index 3726f3ad..00000000 --- a/process_subways.py +++ /dev/null @@ -1,577 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import csv -import inspect -import json -import logging -import os -import re -import sys -import time -import urllib.parse -import urllib.request -from functools import partial - -import processors -from subway_io import ( - dump_yaml, - load_xml, - make_geojson, - read_recovery_data, - write_recovery_data, -) -from subway_structure import ( - City, - CriticalValidationError, - find_transfers, - get_unused_subway_entrances_geojson, - LonLat, - MODES_OVERGROUND, - MODES_RAPID, - OsmElementT, -) - -DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" -DEFAULT_CITIES_INFO_URL = ( - "https://docs.google.com/spreadsheets/d/" - f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" -) -BAD_MARK = "[bad]" - - -def compose_overpass_request( - overground: bool, bboxes: list[list[float]] -) -> str: - if not bboxes: - raise RuntimeError("No bboxes given for overpass request") - - query = "[out:json][timeout:1000];(" - modes = MODES_OVERGROUND if overground else MODES_RAPID - for bbox in bboxes: - bbox_part = f"({','.join(str(coord) for coord in bbox)})" - query += "(" - for mode in sorted(modes): - query += f'rel[route="{mode}"]{bbox_part};' - query += ");" - query += "rel(br)[type=route_master];" - if not overground: - query += f"node[railway=subway_entrance]{bbox_part};" - query += f"node[railway=train_station_entrance]{bbox_part};" - query += f"rel[public_transport=stop_area]{bbox_part};" - query += ( - "rel(br)[type=public_transport][public_transport=stop_area_group];" - ) - query += ");(._;>>;);out body center qt;" - logging.debug("Query: %s", query) - return query - - -def overpass_request( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[OsmElementT]: - query = compose_overpass_request(overground, bboxes) - url = f"{overpass_api}?data={urllib.parse.quote(query)}" - response = urllib.request.urlopen(url, timeout=1000) - if (r_code := response.getcode()) != 200: - raise Exception(f"Failed to query Overpass API: HTTP {r_code}") - return json.load(response)["elements"] - - -def multi_overpass( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[OsmElementT]: - SLICE_SIZE = 10 - INTERREQUEST_WAIT = 5 # in seconds - result = [] - for i in range(0, len(bboxes), SLICE_SIZE): - if i > 0: - time.sleep(INTERREQUEST_WAIT) - bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 - result.extend(overpass_request(overground, overpass_api, bboxes_i)) - return result - - -def slugify(name: str) -> str: - return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) - - -def get_way_center( - element: OsmElementT, node_centers: dict[int, LonLat] -) -> LonLat | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then ways already have 'center' attribute - if "center" in element: - return element["center"]["lon"], element["center"]["lat"] - - if "nodes" not in element: - return None - - center = [0, 0] - count = 0 - way_nodes = element["nodes"] - way_nodes_len = len(element["nodes"]) - for i, nd in enumerate(way_nodes): - if nd not in node_centers: - continue - # Don't count the first node of a closed way twice - if ( - i == way_nodes_len - 1 - and way_nodes_len > 1 - and way_nodes[0] == way_nodes[-1] - ): - break - center[0] += node_centers[nd][0] - center[1] += node_centers[nd][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[1] / count, "lon": center[0] / count} - return element["center"]["lon"], element["center"]["lat"] - - -def get_relation_center( - element: OsmElementT, - node_centers: dict[int, LonLat], - way_centers: dict[int, LonLat], - relation_centers: dict[int, LonLat], - ignore_unlocalized_child_relations: bool = False, -) -> LonLat | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => LonLat - :param way_centers: osm_id => LonLat - :param relation_centers: osm_id => LonLat - :param ignore_unlocalized_child_relations: if a member that is a relation - has no center, skip it and calculate center based on member nodes, - ways and other, "localized" (with known centers), relations - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then some relations already have 'center' - # attribute. But this is not the case for relations composed only - # of other relations (e.g., route_master, stop_area_group or - # stop_area with only members that are multipolygons) - if "center" in element: - return element["center"]["lon"], element["center"]["lat"] - - center = [0, 0] - count = 0 - for m in element.get("members", list()): - m_id = m["ref"] - m_type = m["type"] - if m_type == "relation" and m_id not in relation_centers: - if ignore_unlocalized_child_relations: - continue - else: - # Cannot calculate fair center because the center - # of a child relation is not known yet - return None - member_container = ( - node_centers - if m_type == "node" - else way_centers - if m_type == "way" - else relation_centers - ) - if m_id in member_container: - center[0] += member_container[m_id][0] - center[1] += member_container[m_id][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[1] / count, "lon": center[0] / count} - return element["center"]["lon"], element["center"]["lat"] - - -def calculate_centers(elements: list[OsmElementT]) -> None: - """Adds 'center' key to each way/relation in elements, - except for empty ways or relations. - Relies on nodes-ways-relations order in the elements list. - """ - nodes: dict[int, LonLat] = {} # id => LonLat - ways: dict[int, LonLat] = {} # id => approx center LonLat - relations: dict[int, LonLat] = {} # id => approx center LonLat - - unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means - # the center of the relation has not been calculated yet - - for el in elements: - if el["type"] == "node": - nodes[el["id"]] = (el["lon"], el["lat"]) - elif el["type"] == "way": - if center := get_way_center(el, nodes): - ways[el["id"]] = center - elif el["type"] == "relation": - if center := get_relation_center(el, nodes, ways, relations): - relations[el["id"]] = center - else: - unlocalized_relations.append(el) - - def iterate_relation_centers_calculation( - ignore_unlocalized_child_relations: bool, - ) -> list[OsmElementT]: - unlocalized_relations_upd = [] - for rel in unlocalized_relations: - if center := get_relation_center( - rel, nodes, ways, relations, ignore_unlocalized_child_relations - ): - relations[rel["id"]] = center - else: - unlocalized_relations_upd.append(rel) - return unlocalized_relations_upd - - # Calculate centers for relations that have no one yet - while unlocalized_relations: - unlocalized_relations_upd = iterate_relation_centers_calculation(False) - progress = len(unlocalized_relations_upd) < len(unlocalized_relations) - if not progress: - unlocalized_relations_upd = iterate_relation_centers_calculation( - True - ) - progress = len(unlocalized_relations_upd) < len( - unlocalized_relations - ) - if not progress: - break - unlocalized_relations = unlocalized_relations_upd - - -def add_osm_elements_to_cities( - osm_elements: list[OsmElementT], cities: list[City] -) -> None: - for el in osm_elements: - for c in cities: - if c.contains(el): - c.add(el) - - -def validate_cities(cities: list[City]) -> list[City]: - """Validate cities. Return list of good cities.""" - good_cities = [] - for c in cities: - try: - c.extract_routes() - except CriticalValidationError as e: - logging.error( - "Critical validation error while processing %s: %s", - c.name, - e, - ) - c.error(str(e)) - except AssertionError as e: - logging.error( - "Validation logic error while processing %s: %s", - c.name, - e, - ) - c.error(f"Validation logic error: {e}") - else: - c.validate() - if c.is_good: - c.calculate_distances() - good_cities.append(c) - - return good_cities - - -def get_cities_info( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, -) -> list[dict]: - response = urllib.request.urlopen(cities_info_url) - if ( - not cities_info_url.startswith("file://") - and (r_code := response.getcode()) != 200 - ): - raise Exception( - f"Failed to download cities spreadsheet: HTTP {r_code}" - ) - data = response.read().decode("utf-8") - reader = csv.DictReader( - data.splitlines(), - fieldnames=( - "id", - "name", - "country", - "continent", - "num_stations", - "num_lines", - "num_light_lines", - "num_interchanges", - "bbox", - "networks", - ), - ) - - cities_info = list() - names = set() - next(reader) # skipping the header - for city_info in reader: - if city_info["id"] and city_info["bbox"]: - cities_info.append(city_info) - name = city_info["name"].strip() - if name in names: - logging.warning( - "Duplicate city name in city list: %s", - city_info, - ) - names.add(name) - return cities_info - - -def prepare_cities( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False -) -> list[City]: - if overground: - raise NotImplementedError("Overground transit not implemented yet") - cities_info = get_cities_info(cities_info_url) - return list(map(partial(City, overground=overground), cities_info)) - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument( - "--cities-info-url", - default=DEFAULT_CITIES_INFO_URL, - help=( - "URL of CSV file with reference information about rapid transit " - "networks. file:// protocol is also supported." - ), - ) - parser.add_argument( - "-i", - "--source", - help="File to write backup of OSM data, or to read data from", - ) - parser.add_argument( - "-x", "--xml", help="OSM extract with routes, to read data from" - ) - parser.add_argument( - "--overpass-api", - default="http://overpass-api.de/api/interpreter", - help="Overpass API URL", - ) - parser.add_argument( - "-q", - "--quiet", - action="store_true", - help="Show only warnings and errors", - ) - parser.add_argument( - "-c", "--city", help="Validate only a single city or a country" - ) - parser.add_argument( - "-t", - "--overground", - action="store_true", - help="Process overground transport instead of subways", - ) - parser.add_argument( - "-e", - "--entrances", - type=argparse.FileType("w", encoding="utf-8"), - help="Export unused subway entrances as GeoJSON here", - ) - parser.add_argument( - "-l", - "--log", - type=argparse.FileType("w", encoding="utf-8"), - help="Validation JSON file name", - ) - parser.add_argument( - "--dump-city-list", - type=argparse.FileType("w", encoding="utf-8"), - help=( - "Dump sorted list of all city names, possibly with " - f"{BAD_MARK} mark" - ), - ) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - if not processor_name.startswith("_"): - parser.add_argument( - f"--output-{processor_name}", - help=( - "Processed metro systems output filename " - f"in {processor_name.upper()} format" - ), - ) - - parser.add_argument("--cache", help="Cache file name for processed data") - parser.add_argument( - "-r", "--recovery-path", help="Cache file name for error recovery" - ) - parser.add_argument( - "-d", "--dump", help="Make a YAML file for a city data" - ) - parser.add_argument( - "-j", "--geojson", help="Make a GeoJSON file for a city data" - ) - parser.add_argument( - "--crude", - action="store_true", - help="Do not use OSM railway geometry for GeoJSON", - ) - options = parser.parse_args() - - if options.quiet: - log_level = logging.WARNING - else: - log_level = logging.INFO - logging.basicConfig( - level=log_level, - datefmt="%H:%M:%S", - format="%(asctime)s %(levelname)-7s %(message)s", - ) - - cities = prepare_cities(options.cities_info_url, options.overground) - if options.city: - cities = [ - c - for c in cities - if c.name == options.city or c.country == options.city - ] - if not cities: - logging.error("No cities to process") - sys.exit(2) - - # Augment cities with recovery data - recovery_data = None - if options.recovery_path: - recovery_data = read_recovery_data(options.recovery_path) - for city in cities: - city.recovery_data = recovery_data.get(city.name, None) - - logging.info("Read %s metro networks", len(cities)) - - # Reading cached json, loading XML or querying Overpass API - if options.source and os.path.exists(options.source): - logging.info("Reading %s", options.source) - with open(options.source, "r") as f: - osm = json.load(f) - if "elements" in osm: - osm = osm["elements"] - calculate_centers(osm) - elif options.xml: - logging.info("Reading %s", options.xml) - osm = load_xml(options.xml) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - else: - if len(cities) > 10: - logging.error( - "Would not download that many cities from Overpass API, " - "choose a smaller set" - ) - sys.exit(3) - bboxes = [c.bbox for c in cities] - logging.info("Downloading data from Overpass API") - osm = multi_overpass(options.overground, options.overpass_api, bboxes) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - logging.info("Downloaded %s elements", len(osm)) - - logging.info("Sorting elements by city") - add_osm_elements_to_cities(osm, cities) - - logging.info("Building routes for each city") - good_cities = validate_cities(cities) - - logging.info("Finding transfer stations") - transfers = find_transfers(osm, good_cities) - - good_city_names = set(c.name for c in good_cities) - logging.info( - "%s good cities: %s", - len(good_city_names), - ", ".join(sorted(good_city_names)), - ) - bad_city_names = set(c.name for c in cities) - good_city_names - logging.info( - "%s bad cities: %s", - len(bad_city_names), - ", ".join(sorted(bad_city_names)), - ) - - if options.dump_city_list: - lines = sorted( - f"{city.name}, {city.country}" - f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" - for city in cities - ) - options.dump_city_list.writelines(lines) - - if options.recovery_path: - write_recovery_data(options.recovery_path, recovery_data, cities) - - if options.entrances: - json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) - - if options.dump: - if os.path.isdir(options.dump): - for c in cities: - with open( - os.path.join(options.dump, slugify(c.name) + ".yaml"), - "w", - encoding="utf-8", - ) as f: - dump_yaml(c, f) - elif len(cities) == 1: - with open(options.dump, "w", encoding="utf-8") as f: - dump_yaml(cities[0], f) - else: - logging.error("Cannot dump %s cities at once", len(cities)) - - if options.geojson: - if os.path.isdir(options.geojson): - for c in cities: - with open( - os.path.join( - options.geojson, slugify(c.name) + ".geojson" - ), - "w", - encoding="utf-8", - ) as f: - json.dump(make_geojson(c, not options.crude), f) - elif len(cities) == 1: - with open(options.geojson, "w", encoding="utf-8") as f: - json.dump(make_geojson(cities[0], not options.crude), f) - else: - logging.error( - "Cannot make a geojson of %s cities at once", len(cities) - ) - - if options.log: - res = [] - for c in cities: - v = c.get_validation_result() - v["slug"] = slugify(c.name) - res.append(v) - json.dump(res, options.log, indent=2, ensure_ascii=False) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - option_name = f"output_{processor_name}" - - if not getattr(options, option_name, None): - continue - - filename = getattr(options, option_name) - processor.process(cities, transfers, filename, options.cache) - - -if __name__ == "__main__": - main() diff --git a/scripts/process_subways.py b/scripts/process_subways.py new file mode 100755 index 00000000..65d16007 --- /dev/null +++ b/scripts/process_subways.py @@ -0,0 +1,276 @@ +import argparse +import inspect +import json +import logging +import os +import re +import sys + +from subways import processors +from subways.overpass import multi_overpass +from subways.subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from subways.structure.city import ( + find_transfers, + get_unused_subway_entrances_geojson, +) +from subways.validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + prepare_cities, + validate_cities, +) + + +def slugify(name: str) -> str: + return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + parser.add_argument( + "-i", + "--source", + help="File to write backup of OSM data, or to read data from", + ) + parser.add_argument( + "-x", "--xml", help="OSM extract with routes, to read data from" + ) + parser.add_argument( + "--overpass-api", + default="http://overpass-api.de/api/interpreter", + help="Overpass API URL", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Show only warnings and errors", + ) + parser.add_argument( + "-c", "--city", help="Validate only a single city or a country" + ) + parser.add_argument( + "-t", + "--overground", + action="store_true", + help="Process overground transport instead of subways", + ) + parser.add_argument( + "-e", + "--entrances", + type=argparse.FileType("w", encoding="utf-8"), + help="Export unused subway entrances as GeoJSON here", + ) + parser.add_argument( + "-l", + "--log", + type=argparse.FileType("w", encoding="utf-8"), + help="Validation JSON file name", + ) + parser.add_argument( + "--dump-city-list", + type=argparse.FileType("w", encoding="utf-8"), + help=( + "Dump sorted list of all city names, possibly with " + f"{BAD_MARK} mark" + ), + ) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + if not processor_name.startswith("_"): + parser.add_argument( + f"--output-{processor_name}", + help=( + "Processed metro systems output filename " + f"in {processor_name.upper()} format" + ), + ) + + parser.add_argument("--cache", help="Cache file name for processed data") + parser.add_argument( + "-r", "--recovery-path", help="Cache file name for error recovery" + ) + parser.add_argument( + "-d", "--dump", help="Make a YAML file for a city data" + ) + parser.add_argument( + "-j", "--geojson", help="Make a GeoJSON file for a city data" + ) + parser.add_argument( + "--crude", + action="store_true", + help="Do not use OSM railway geometry for GeoJSON", + ) + options = parser.parse_args() + + if options.quiet: + log_level = logging.WARNING + else: + log_level = logging.INFO + logging.basicConfig( + level=log_level, + datefmt="%H:%M:%S", + format="%(asctime)s %(levelname)-7s %(message)s", + ) + + cities = prepare_cities(options.cities_info_url, options.overground) + if options.city: + cities = [ + c + for c in cities + if c.name == options.city or c.country == options.city + ] + if not cities: + logging.error("No cities to process") + sys.exit(2) + + # Augment cities with recovery data + recovery_data = None + if options.recovery_path: + recovery_data = read_recovery_data(options.recovery_path) + for city in cities: + city.recovery_data = recovery_data.get(city.name, None) + + logging.info("Read %s metro networks", len(cities)) + + # Reading cached json, loading XML or querying Overpass API + if options.source and os.path.exists(options.source): + logging.info("Reading %s", options.source) + with open(options.source, "r") as f: + osm = json.load(f) + if "elements" in osm: + osm = osm["elements"] + calculate_centers(osm) + elif options.xml: + logging.info("Reading %s", options.xml) + osm = load_xml(options.xml) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + else: + if len(cities) > 10: + logging.error( + "Would not download that many cities from Overpass API, " + "choose a smaller set" + ) + sys.exit(3) + bboxes = [c.bbox for c in cities] + logging.info("Downloading data from Overpass API") + osm = multi_overpass(options.overground, options.overpass_api, bboxes) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + logging.info("Downloaded %s elements", len(osm)) + + logging.info("Sorting elements by city") + add_osm_elements_to_cities(osm, cities) + + logging.info("Building routes for each city") + good_cities = validate_cities(cities) + + logging.info("Finding transfer stations") + transfers = find_transfers(osm, good_cities) + + good_city_names = set(c.name for c in good_cities) + logging.info( + "%s good cities: %s", + len(good_city_names), + ", ".join(sorted(good_city_names)), + ) + bad_city_names = set(c.name for c in cities) - good_city_names + logging.info( + "%s bad cities: %s", + len(bad_city_names), + ", ".join(sorted(bad_city_names)), + ) + + if options.dump_city_list: + lines = sorted( + f"{city.name}, {city.country}" + f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" + for city in cities + ) + options.dump_city_list.writelines(lines) + + if options.recovery_path: + write_recovery_data(options.recovery_path, recovery_data, cities) + + if options.entrances: + json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) + + if options.dump: + if os.path.isdir(options.dump): + for c in cities: + with open( + os.path.join(options.dump, slugify(c.name) + ".yaml"), + "w", + encoding="utf-8", + ) as f: + dump_yaml(c, f) + elif len(cities) == 1: + with open(options.dump, "w", encoding="utf-8") as f: + dump_yaml(cities[0], f) + else: + logging.error("Cannot dump %s cities at once", len(cities)) + + if options.geojson: + if os.path.isdir(options.geojson): + for c in cities: + with open( + os.path.join( + options.geojson, slugify(c.name) + ".geojson" + ), + "w", + encoding="utf-8", + ) as f: + json.dump(make_geojson(c, not options.crude), f) + elif len(cities) == 1: + with open(options.geojson, "w", encoding="utf-8") as f: + json.dump(make_geojson(cities[0], not options.crude), f) + else: + logging.error( + "Cannot make a geojson of %s cities at once", len(cities) + ) + + if options.log: + res = [] + for c in cities: + v = c.get_validation_result() + v["slug"] = slugify(c.name) + res.append(v) + json.dump(res, options.log, indent=2, ensure_ascii=False) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + option_name = f"output_{processor_name}" + + if not getattr(options, option_name, None): + continue + + filename = getattr(options, option_name) + processor.process(cities, transfers, filename, options.cache) + + +if __name__ == "__main__": + main() diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 345dd2de..62a45e75 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -94,7 +94,7 @@ function check_poly() { if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then "$PYTHON" -m pip install shapely==2.0.1 fi - "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \ + "$PYTHON" "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" fi fi @@ -107,13 +107,15 @@ PYTHON=${PYTHON:-python3} # This will fail if there is no python "$PYTHON" --version > /dev/null -SUBWAYS_PATH="$(dirname "$0")/.." -if [ ! -f "$SUBWAYS_PATH/process_subways.py" ]; then +# "readlink -f" echoes canonicalized absolute path to a file/directory +SUBWAYS_REPO_PATH="$(readlink -f $(dirname "$0")/..)" + +if [ ! -f "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ]; then echo "Please clone the subways repo to $SUBWAYS_PATH" exit 2 fi -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" +TMPDIR="${TMPDIR:-$SUBWAYS_REPO_PATH}" mkdir -p "$TMPDIR" # Downloading the latest version of the subways script @@ -242,7 +244,7 @@ if [ -n "${DUMP-}" ]; then fi VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ +"$PYTHON" "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ @@ -262,13 +264,13 @@ fi # Preparing HTML files if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" + HTML_DIR="$SUBWAYS_REPO_PATH/html" REMOVE_HTML=1 fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \ +"$PYTHON" "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ "$VALIDATION" "$HTML_DIR" diff --git a/subway_structure.py b/subway_structure.py deleted file mode 100644 index 94c6f479..00000000 --- a/subway_structure.py +++ /dev/null @@ -1,2505 +0,0 @@ -from __future__ import annotations - -import math -import re -from collections import Counter, defaultdict -from collections.abc import Callable, Collection, Iterator -from itertools import chain, islice -from typing import TypeAlias, TypeVar - -from css_colours import normalize_colour - -MAX_DISTANCE_TO_ENTRANCES = 300 # in meters -MAX_DISTANCE_STOP_TO_LINE = 50 # in meters -ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count -ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count -ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees -DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees -SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters - -# If an object was moved not too far compared to previous script run, -# it is likely the same object -DISPLACEMENT_TOLERANCE = 300 # in meters - -MODES_RAPID = {"subway", "light_rail", "monorail", "train"} -MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} -DEFAULT_MODES_RAPID = {"subway", "light_rail"} -DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? -ALL_MODES = MODES_RAPID | MODES_OVERGROUND -RAILWAY_TYPES = { - "rail", - "light_rail", - "subway", - "narrow_gauge", - "funicular", - "monorail", - "tram", -} -CONSTRUCTION_KEYS = ( - "construction", - "proposed", - "construction:railway", - "proposed:railway", -) - -used_entrances = set() - - -START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") - -OsmElementT: TypeAlias = dict -IdT: TypeAlias = str # Type of feature ids -TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs -TransfersT: TypeAlias = list[TransferT] -LonLat: TypeAlias = tuple[float, float] -RailT: TypeAlias = list[LonLat] -T = TypeVar("T") - - -def get_start_end_times( - opening_hours: str, -) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: - """Very simplified method to parse OSM opening_hours tag. - We simply take the first HH:MM-HH:MM substring which is the most probable - opening hours interval for the most of the weekdays. - """ - start_time, end_time = None, None - m = START_END_TIMES_RE.match(opening_hours) - if m: - ints = tuple(map(int, m.groups())) - start_time = (ints[0], ints[1]) - end_time = (ints[2], ints[3]) - return start_time, end_time - - -def osm_interval_to_seconds(interval_str: str) -> int | None: - """Convert to int an OSM value for 'interval'/'headway' tag - which may be in these formats: - HH:MM:SS, - HH:MM, - MM, - M - (https://wiki.openstreetmap.org/wiki/Key:interval#Format) - """ - hours, minutes, seconds = 0, 0, 0 - semicolon_count = interval_str.count(":") - try: - if semicolon_count == 0: - minutes = int(interval_str) - elif semicolon_count == 1: - hours, minutes = map(int, interval_str.split(":")) - elif semicolon_count == 2: - hours, minutes, seconds = map(int, interval_str.split(":")) - else: - return None - except ValueError: - return None - return seconds + 60 * minutes + 60 * 60 * hours - - -class CriticalValidationError(Exception): - """Is thrown if an error occurs - that prevents further validation of a city.""" - - -def el_id(el: OsmElementT) -> IdT | None: - if not el: - return None - if "type" not in el: - raise Exception("What is this element? {}".format(el)) - return el["type"][0] + str(el.get("id", el.get("ref", ""))) - - -def el_center(el: OsmElementT) -> LonLat | None: - if not el: - return None - if "lat" in el: - return el["lon"], el["lat"] - elif "center" in el: - return el["center"]["lon"], el["center"]["lat"] - return None - - -def distance(p1: LonLat, p2: LonLat) -> float: - if p1 is None or p2 is None: - raise Exception( - "One of arguments to distance({}, {}) is None".format(p1, p2) - ) - dx = math.radians(p1[0] - p2[0]) * math.cos( - 0.5 * math.radians(p1[1] + p2[1]) - ) - dy = math.radians(p1[1] - p2[1]) - return 6378137 * math.sqrt(dx * dx + dy * dy) - - -def is_near(p1: LonLat, p2: LonLat) -> bool: - return ( - p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 - and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 - ) - - -def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: - """Given three points, return u - the position of projection of - point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector - """ - dp = (p2[0] - p1[0], p2[1] - p1[1]) - d2 = dp[0] * dp[0] + dp[1] * dp[1] - if d2 < 1e-14: - return None - u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 - if not 0 <= u <= 1: - return None - return u - - -def project_on_line(p: LonLat, line: RailT) -> dict: - result = { - # In the first approximation, position on rails is the index of the - # closest vertex of line to the point p. Fractional value means that - # the projected point lies on a segment between two vertices. - # More than one value can occur if a route follows the same tracks - # more than once. - "positions_on_line": None, - "projected_point": None, # (lon, lat) - } - - if len(line) < 2: - return result - d_min = MAX_DISTANCE_STOP_TO_LINE * 5 - closest_to_vertex = False - # First, check vertices in the line - for i, vertex in enumerate(line): - d = distance(p, vertex) - if d < d_min: - result["positions_on_line"] = [i] - result["projected_point"] = vertex - d_min = d - closest_to_vertex = True - elif vertex == result["projected_point"]: - # Repeated occurrence of the track vertex in line, like Oslo Line 5 - result["positions_on_line"].append(i) - # And then calculate distances to each segment - for seg in range(len(line) - 1): - # Check bbox for speed - if not ( - ( - min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE - <= p[0] - <= max(line[seg][0], line[seg + 1][0]) - + MAX_DISTANCE_STOP_TO_LINE - ) - and ( - min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE - <= p[1] - <= max(line[seg][1], line[seg + 1][1]) - + MAX_DISTANCE_STOP_TO_LINE - ) - ): - continue - u = project_on_segment(p, line[seg], line[seg + 1]) - if u: - projected_point = ( - line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), - line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), - ) - d = distance(p, projected_point) - if d < d_min: - result["positions_on_line"] = [seg + u] - result["projected_point"] = projected_point - d_min = d - closest_to_vertex = False - elif projected_point == result["projected_point"]: - # Repeated occurrence of the track segment in line, - # like Oslo Line 5 - if not closest_to_vertex: - result["positions_on_line"].append(seg + u) - return result - - -def find_segment( - p: LonLat, line: RailT, start_vertex: int = 0 -) -> tuple[int, float] | tuple[None, None]: - """Returns index of a segment and a position inside it.""" - EPS = 1e-9 - for seg in range(start_vertex, len(line) - 1): - if is_near(p, line[seg]): - return seg, 0.0 - if line[seg][0] == line[seg + 1][0]: - if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): - continue - px = None - else: - px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) - if px is None or (0 <= px <= 1): - if line[seg][1] == line[seg + 1][1]: - if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): - continue - py = None - else: - py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) - if py is None or (0 <= py <= 1): - if py is None or px is None or (px - EPS <= py <= px + EPS): - return seg, px or py - return None, None - - -def distance_on_line( - p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 -) -> tuple[float, int] | None: - """Calculates distance via line between projections - of points p1 and p2. Returns a TUPLE of (d, vertex): - d is the distance and vertex is the number of the second - vertex, to continue calculations for the next point.""" - line_len = len(line) - seg1, pos1 = find_segment(p1, line, start_vertex) - if seg1 is None: - # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) - return None - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - if line[0] == line[-1]: - line = line + line[1:] - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) - return None - if seg1 == seg2: - return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 - if seg2 < seg1: - # Should not happen - raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) - d = 0 - if pos1 < 1: - d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) - for i in range(seg1 + 1, seg2): - d += distance(line[i], line[i + 1]) - if pos2 > 0: - d += distance(line[seg2], line[seg2 + 1]) * pos2 - return d, seg2 % line_len - - -def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: - a = round( - abs( - math.degrees( - math.atan2(p1[1] - c[1], p1[0] - c[0]) - - math.atan2(p2[1] - c[1], p2[0] - c[0]) - ) - ) - ) - return a if a <= 180 else 360 - a - - -def format_elid_list(ids: Collection[IdT]) -> str: - msg = ", ".join(sorted(ids)[:20]) - if len(ids) > 20: - msg += ", ..." - return msg - - -class Station: - @staticmethod - def get_modes(el: OsmElementT) -> set[str]: - modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} - if mode := el["tags"].get("station"): - modes.add(mode) - return modes - - @staticmethod - def is_station(el: OsmElementT, modes: set[str]) -> bool: - # public_transport=station is too ambiguous and unspecific to use, - # so we expect for it to be backed by railway=station. - if ( - "tram" in modes - and el.get("tags", {}).get("railway") == "tram_stop" - ): - return True - if el.get("tags", {}).get("railway") not in ("station", "halt"): - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - # Not checking for station=train, obviously - if "train" not in modes and Station.get_modes(el).isdisjoint(modes): - return False - return True - - def __init__(self, el: OsmElementT, city: City) -> None: - """Call this with a railway=station node.""" - if not Station.is_station(el, city.modes): - raise Exception( - "Station object should be instantiated from a station node. " - "Got: {}".format(el) - ) - - self.id: IdT = el_id(el) - self.element: OsmElementT = el - self.modes = Station.get_modes(el) - self.name = el["tags"].get("name", "?") - self.int_name = el["tags"].get( - "int_name", el["tags"].get("name:en", None) - ) - try: - self.colour = normalize_colour(el["tags"].get("colour", None)) - except ValueError as e: - self.colour = None - city.warn(str(e), el) - self.center = el_center(el) - if self.center is None: - raise Exception("Could not find center of {}".format(el)) - - def __repr__(self) -> str: - return "Station(id={}, modes={}, name={}, center={})".format( - self.id, ",".join(self.modes), self.name, self.center - ) - - -class StopArea: - @staticmethod - def is_stop(el: OsmElementT) -> bool: - if "tags" not in el: - return False - if el["tags"].get("railway") == "stop": - return True - if el["tags"].get("public_transport") == "stop_position": - return True - return False - - @staticmethod - def is_platform(el: OsmElementT) -> bool: - if "tags" not in el: - return False - if el["tags"].get("railway") in ("platform", "platform_edge"): - return True - if el["tags"].get("public_transport") == "platform": - return True - return False - - @staticmethod - def is_track(el: OsmElementT) -> bool: - if el["type"] != "way" or "tags" not in el: - return False - return el["tags"].get("railway") in RAILWAY_TYPES - - def __init__( - self, - station: Station, - city: City, - stop_area: OsmElementT | None = None, - ) -> None: - """Call this with a Station object.""" - - self.element: OsmElementT = stop_area or station.element - self.id: IdT = el_id(self.element) - self.station: Station = station - self.stops = set() # set of el_ids of stop_positions - self.platforms = set() # set of el_ids of platforms - self.exits = set() # el_id of subway_entrance/train_station_entrance - # for leaving the platform - self.entrances = set() # el_id of subway/train_station entrance - # for entering the platform - self.center = None # lon, lat of the station centre point - self.centers = {} # el_id -> (lon, lat) for all elements - self.transfer = None # el_id of a transfer relation - - self.modes = station.modes - self.name = station.name - self.int_name = station.int_name - self.colour = station.colour - - if stop_area: - self.name = stop_area["tags"].get("name", self.name) - self.int_name = stop_area["tags"].get( - "int_name", stop_area["tags"].get("name:en", self.int_name) - ) - try: - self.colour = ( - normalize_colour(stop_area["tags"].get("colour")) - or self.colour - ) - except ValueError as e: - city.warn(str(e), stop_area) - - self._process_members(station, city, stop_area) - else: - self._add_nearby_entrances(station, city) - - if self.exits and not self.entrances: - city.warn( - "Only exits for a station, no entrances", - stop_area or station.element, - ) - if self.entrances and not self.exits: - city.warn("No exits for a station", stop_area or station.element) - - for el in self.get_elements(): - self.centers[el] = el_center(city.elements[el]) - - """Calculate the center point of the station. This algorithm - cannot rely on a station node, since many stop_areas can share one. - Basically it averages center points of all platforms - and stop positions.""" - if len(self.stops) + len(self.platforms) == 0: - self.center = station.center - else: - self.center = [0, 0] - for sp in chain(self.stops, self.platforms): - spc = self.centers[sp] - for i in range(2): - self.center[i] += spc[i] - for i in range(2): - self.center[i] /= len(self.stops) + len(self.platforms) - - def _process_members( - self, station: Station, city: City, stop_area: OsmElementT - ) -> None: - # If we have a stop area, add all elements from it - tracks_detected = False - for m in stop_area["members"]: - k = el_id(m) - m_el = city.elements.get(k) - if not m_el or "tags" not in m_el: - continue - if Station.is_station(m_el, city.modes): - if k != station.id: - city.error("Stop area has multiple stations", stop_area) - elif StopArea.is_stop(m_el): - self.stops.add(k) - elif StopArea.is_platform(m_el): - self.platforms.add(k) - elif (entrance_type := m_el["tags"].get("railway")) in ( - "subway_entrance", - "train_station_entrance", - ): - if m_el["type"] != "node": - city.warn(f"{entrance_type} is not a node", m_el) - if ( - m_el["tags"].get("entrance") != "exit" - and m["role"] != "exit_only" - ): - self.entrances.add(k) - if ( - m_el["tags"].get("entrance") != "entrance" - and m["role"] != "entry_only" - ): - self.exits.add(k) - elif StopArea.is_track(m_el): - tracks_detected = True - - if tracks_detected: - city.warn("Tracks in a stop_area relation", stop_area) - - def _add_nearby_entrances(self, station: Station, city: City) -> None: - center = station.center - for entrance_el in ( - el - for el in city.elements.values() - if "tags" in el - and (entrance_type := el["tags"].get("railway")) - in ("subway_entrance", "train_station_entrance") - ): - entrance_id = el_id(entrance_el) - if entrance_id in city.stop_areas: - continue # This entrance belongs to some stop_area - c_center = el_center(entrance_el) - if ( - c_center - and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES - ): - if entrance_el["type"] != "node": - city.warn(f"{entrance_type} is not a node", entrance_el) - etag = entrance_el["tags"].get("entrance") - if etag != "exit": - self.entrances.add(entrance_id) - if etag != "entrance": - self.exits.add(entrance_id) - - def get_elements(self) -> set[IdT]: - result = {self.id, self.station.id} - result.update(self.entrances) - result.update(self.exits) - result.update(self.stops) - result.update(self.platforms) - return result - - def __repr__(self) -> str: - return ( - f"StopArea(id={self.id}, name={self.name}, station={self.station}," - f" transfer={self.transfer}, center={self.center})" - ) - - -class RouteStop: - def __init__(self, stoparea: StopArea) -> None: - self.stoparea: StopArea = stoparea - self.stop: LonLat = None # Stop position, possibly projected - self.distance = 0 # In meters from the start of the route - self.platform_entry = None # Platform el_id - self.platform_exit = None # Platform el_id - self.can_enter = False - self.can_exit = False - self.seen_stop = False - self.seen_platform_entry = False - self.seen_platform_exit = False - self.seen_station = False - - @property - def seen_platform(self) -> bool: - return self.seen_platform_entry or self.seen_platform_exit - - @staticmethod - def get_actual_role( - el: OsmElementT, role: str, modes: set[str] - ) -> str | None: - if StopArea.is_stop(el): - return "stop" - elif StopArea.is_platform(el): - return "platform" - elif Station.is_station(el, modes): - if "platform" in role: - return "platform" - else: - return "stop" - return None - - def add(self, member: dict, relation: OsmElementT, city: City) -> None: - el = city.elements[el_id(member)] - role = member["role"] - - if StopArea.is_stop(el): - if "platform" in role: - city.warn("Stop position in a platform role in a route", el) - if el["type"] != "node": - city.error("Stop position is not a node", el) - self.stop = el_center(el) - if "entry_only" not in role: - self.can_exit = True - if "exit_only" not in role: - self.can_enter = True - - elif Station.is_station(el, city.modes): - if el["type"] != "node": - city.notice("Station in route is not a node", el) - - if not self.seen_stop and not self.seen_platform: - self.stop = el_center(el) - self.can_enter = True - self.can_exit = True - - elif StopArea.is_platform(el): - if "stop" in role: - city.warn("Platform in a stop role in a route", el) - if "exit_only" not in role: - self.platform_entry = el_id(el) - self.can_enter = True - if "entry_only" not in role: - self.platform_exit = el_id(el) - self.can_exit = True - if not self.seen_stop: - self.stop = el_center(el) - - multiple_check = False - actual_role = RouteStop.get_actual_role(el, role, city.modes) - if actual_role == "platform": - if role == "platform_entry_only": - multiple_check = self.seen_platform_entry - self.seen_platform_entry = True - elif role == "platform_exit_only": - multiple_check = self.seen_platform_exit - self.seen_platform_exit = True - else: - if role != "platform" and "stop" not in role: - city.warn( - f'Platform "{el["tags"].get("name", "")}" ' - f'({el_id(el)}) with invalid role "{role}" in route', - relation, - ) - multiple_check = self.seen_platform - self.seen_platform_entry = True - self.seen_platform_exit = True - elif actual_role == "stop": - multiple_check = self.seen_stop - self.seen_stop = True - if multiple_check: - log_function = city.error if actual_role == "stop" else city.notice - log_function( - f'Multiple {actual_role}s for a station "' - f'{el["tags"].get("name", "")} ' - f"({el_id(el)}) in a route relation", - relation, - ) - - def __repr__(self) -> str: - return ( - "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( - self.stop, - self.platform_entry, - self.platform_exit, - self.stoparea, - ) - ) - - -class Route: - """The longest route for a city with a unique ref.""" - - @staticmethod - def is_route(el: OsmElementT, modes: set[str]) -> bool: - if ( - el["type"] != "relation" - or el.get("tags", {}).get("type") != "route" - ): - return False - if "members" not in el: - return False - if el["tags"].get("route") not in modes: - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - if "ref" not in el["tags"] and "name" not in el["tags"]: - return False - return True - - @staticmethod - def get_network(relation: OsmElementT) -> str | None: - for k in ("network:metro", "network", "operator"): - if k in relation["tags"]: - return relation["tags"][k] - return None - - @staticmethod - def get_interval(tags: dict) -> int | None: - v = None - for k in ("interval", "headway"): - if k in tags: - v = tags[k] - break - else: - for kk in tags: - if kk.startswith(k + ":"): - v = tags[kk] - break - if not v: - return None - return osm_interval_to_seconds(v) - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route_stop in self: - stoparea = route_stop.stoparea - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - def __init__( - self, - relation: OsmElementT, - city: City, - master: OsmElementT | None = None, - ) -> None: - assert Route.is_route( - relation, city.modes - ), f"The relation does not seem to be a route: {relation}" - self.city = city - self.element: OsmElementT = relation - self.id: IdT = el_id(relation) - - self.ref = None - self.name = None - self.mode = None - self.colour = None - self.infill = None - self.network = None - self.interval = None - self.start_time = None - self.end_time = None - self.is_circular = False - self.stops: list[RouteStop] = [] - # Would be a list of (lon, lat) for the longest stretch. Can be empty. - self.tracks = None - # Index of the first stop that is located on/near the self.tracks - self.first_stop_on_rails_index = None - # Index of the last stop that is located on/near the self.tracks - self.last_stop_on_rails_index = None - - self.process_tags(master) - stop_position_elements = self.process_stop_members() - self.process_tracks(stop_position_elements) - - def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: - line_nodes: set[IdT] = set() - last_track: list[IdT] = [] - track: list[IdT] = [] - warned_about_holes = False - for m in self.element["members"]: - el = self.city.elements.get(el_id(m), None) - if not el or not StopArea.is_track(el): - continue - if "nodes" not in el or len(el["nodes"]) < 2: - self.city.error("Cannot find nodes in a railway", el) - continue - nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] - if m["role"] == "backward": - nodes.reverse() - line_nodes.update(nodes) - if not track: - is_first = True - track.extend(nodes) - else: - new_segment = list(nodes) # copying - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - elif new_segment[-1] == track[-1]: - track.extend(reversed(new_segment[:-1])) - elif is_first and track[0] in ( - new_segment[0], - new_segment[-1], - ): - # We can reverse the track and try again - track.reverse() - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - else: - track.extend(reversed(new_segment[:-1])) - else: - # Store the track if it is long and clean it - if not warned_about_holes: - self.city.warn( - "Hole in route rails near node {}".format( - track[-1] - ), - self.element, - ) - warned_about_holes = True - if len(track) > len(last_track): - last_track = track - track = [] - is_first = False - if len(track) > len(last_track): - last_track = track - # Remove duplicate points - last_track = [ - last_track[i] - for i in range(0, len(last_track)) - if i == 0 or last_track[i - 1] != last_track[i] - ] - return last_track, line_nodes - - def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: - projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - - def stop_near_tracks_criterion(stop_index: int) -> bool: - return ( - projected[stop_index]["projected_point"] is not None - and distance( - self.stops[stop_index].stop, - projected[stop_index]["projected_point"], - ) - <= MAX_DISTANCE_STOP_TO_LINE - ) - - return projected, stop_near_tracks_criterion - - def project_stops_on_line(self) -> dict: - projected, stop_near_tracks_criterion = self.get_stop_projections() - - projected_stops_data = { - "first_stop_on_rails_index": None, - "last_stop_on_rails_index": None, - "stops_on_longest_line": [], # list [{'route_stop': RouteStop, - # 'coords': LonLat, - # 'positions_on_rails': [] } - } - first_index = 0 - while first_index < len(self.stops) and not stop_near_tracks_criterion( - first_index - ): - first_index += 1 - projected_stops_data["first_stop_on_rails_index"] = first_index - - last_index = len(self.stops) - 1 - while last_index > projected_stops_data[ - "first_stop_on_rails_index" - ] and not stop_near_tracks_criterion(last_index): - last_index -= 1 - projected_stops_data["last_stop_on_rails_index"] = last_index - - for i, route_stop in enumerate(self.stops): - if not first_index <= i <= last_index: - continue - - if projected[i]["projected_point"] is None: - self.city.error( - 'Stop "{}" {} is nowhere near the tracks'.format( - route_stop.stoparea.name, route_stop.stop - ), - self.element, - ) - else: - stop_data = { - "route_stop": route_stop, - "coords": None, - "positions_on_rails": None, - } - projected_point = projected[i]["projected_point"] - # We've got two separate stations with a good stretch of - # railway tracks between them. Put these on tracks. - d = round(distance(route_stop.stop, projected_point)) - if d > MAX_DISTANCE_STOP_TO_LINE: - self.city.notice( - 'Stop "{}" {} is {} meters from the tracks'.format( - route_stop.stoparea.name, route_stop.stop, d - ), - self.element, - ) - else: - stop_data["coords"] = projected_point - stop_data["positions_on_rails"] = projected[i][ - "positions_on_line" - ] - projected_stops_data["stops_on_longest_line"].append(stop_data) - return projected_stops_data - - def calculate_distances(self) -> None: - dist = 0 - vertex = 0 - for i, stop in enumerate(self.stops): - if i > 0: - direct = distance(stop.stop, self.stops[i - 1].stop) - d_line = None - if ( - self.first_stop_on_rails_index - <= i - <= self.last_stop_on_rails_index - ): - d_line = distance_on_line( - self.stops[i - 1].stop, stop.stop, self.tracks, vertex - ) - if d_line and direct - 10 <= d_line[0] <= direct * 2: - vertex = d_line[1] - dist += round(d_line[0]) - else: - dist += round(direct) - stop.distance = dist - - def process_tags(self, master: OsmElementT) -> None: - relation = self.element - master_tags = {} if not master else master["tags"] - if "ref" not in relation["tags"] and "ref" not in master_tags: - self.city.notice("Missing ref on a route", relation) - self.ref = relation["tags"].get( - "ref", master_tags.get("ref", relation["tags"].get("name", None)) - ) - self.name = relation["tags"].get("name", None) - self.mode = relation["tags"]["route"] - if ( - "colour" not in relation["tags"] - and "colour" not in master_tags - and self.mode != "tram" - ): - self.city.notice("Missing colour on a route", relation) - try: - self.colour = normalize_colour( - relation["tags"].get("colour", master_tags.get("colour", None)) - ) - except ValueError as e: - self.colour = None - self.city.warn(str(e), relation) - try: - self.infill = normalize_colour( - relation["tags"].get( - "colour:infill", master_tags.get("colour:infill", None) - ) - ) - except ValueError as e: - self.infill = None - self.city.warn(str(e), relation) - self.network = Route.get_network(relation) - self.interval = Route.get_interval( - relation["tags"] - ) or Route.get_interval(master_tags) - self.start_time, self.end_time = get_start_end_times( - relation["tags"].get( - "opening_hours", master_tags.get("opening_hours", "") - ) - ) - if relation["tags"].get("public_transport:version") == "1": - self.city.warn( - "Public transport version is 1, which means the route " - "is an unsorted pile of objects", - relation, - ) - - def process_stop_members(self) -> list[OsmElementT]: - stations: set[StopArea] = set() # temporary for recording stations - seen_stops = False - seen_platforms = False - repeat_pos = None - stop_position_elements: list[OsmElementT] = [] - for m in self.element["members"]: - if "inactive" in m["role"]: - continue - k = el_id(m) - if k in self.city.stations: - st_list = self.city.stations[k] - st = st_list[0] - if len(st_list) > 1: - self.city.error( - f"Ambiguous station {st.name} in route. Please " - "use stop_position or split interchange stations", - self.element, - ) - el = self.city.elements[k] - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - if m["role"] and actual_role not in m["role"]: - self.city.warn( - "Wrong role '{}' for {} {}".format( - m["role"], actual_role, k - ), - self.element, - ) - if repeat_pos is None: - if not self.stops or st not in stations: - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - elif self.stops[-1].stoparea.id == st.id: - stop = self.stops[-1] - else: - # We've got a repeat - if ( - (seen_stops and seen_platforms) - or ( - actual_role == "stop" - and not seen_platforms - ) - or ( - actual_role == "platform" - and not seen_stops - ) - ): - # Circular route! - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - else: - repeat_pos = 0 - if repeat_pos is not None: - if repeat_pos >= len(self.stops): - continue - # Check that the type matches - if (actual_role == "stop" and seen_stops) or ( - actual_role == "platform" and seen_platforms - ): - self.city.error( - 'Found an out-of-place {}: "{}" ({})'.format( - actual_role, el["tags"].get("name", ""), k - ), - self.element, - ) - continue - # Find the matching stop starting with index repeat_pos - while ( - repeat_pos < len(self.stops) - and self.stops[repeat_pos].stoparea.id != st.id - ): - repeat_pos += 1 - if repeat_pos >= len(self.stops): - self.city.error( - "Incorrect order of {}s at {}".format( - actual_role, k - ), - self.element, - ) - continue - stop = self.stops[repeat_pos] - - stop.add(m, self.element, self.city) - if repeat_pos is None: - seen_stops |= stop.seen_stop or stop.seen_station - seen_platforms |= stop.seen_platform - - if StopArea.is_stop(el): - stop_position_elements.append(el) - - continue - - if k not in self.city.elements: - if "stop" in m["role"] or "platform" in m["role"]: - raise CriticalValidationError( - f"{m['role']} {m['type']} {m['ref']} for route " - f"relation {self.element['id']} is not in the dataset" - ) - continue - el = self.city.elements[k] - if "tags" not in el: - self.city.error( - f"Untagged object {k} in a route", self.element - ) - continue - - is_under_construction = False - for ck in CONSTRUCTION_KEYS: - if ck in el["tags"]: - self.city.warn( - f"Under construction {m['role'] or 'feature'} {k} " - "in route. Consider setting 'inactive' role or " - "removing construction attributes", - self.element, - ) - is_under_construction = True - break - if is_under_construction: - continue - - if Station.is_station(el, self.city.modes): - # A station may be not included in this route due to previous - # 'stop area has multiple stations' error. No other error - # message is needed. - pass - elif el["tags"].get("railway") in ("station", "halt"): - self.city.error( - "Missing station={} on a {}".format(self.mode, m["role"]), - el, - ) - else: - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - self.city.error( - f"{actual_role} {m['type']} {m['ref']} is not " - "connected to a station in route", - self.element, - ) - elif not StopArea.is_track(el): - self.city.warn( - "Unknown member type for {} {} in route".format( - m["type"], m["ref"] - ), - self.element, - ) - return stop_position_elements - - def process_tracks( - self, stop_position_elements: list[OsmElementT] - ) -> None: - tracks, line_nodes = self.build_longest_line() - - for stop_el in stop_position_elements: - stop_id = el_id(stop_el) - if stop_id not in line_nodes: - self.city.warn( - 'Stop position "{}" ({}) is not on tracks'.format( - stop_el["tags"].get("name", ""), stop_id - ), - self.element, - ) - - # self.tracks would be a list of (lon, lat) for the longest stretch. - # Can be empty. - self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] - if ( - None in self.tracks - ): # usually, extending BBOX for the city is needed - self.tracks = [] - for n in filter(lambda x: x not in self.city.elements, tracks): - self.city.warn( - f"The dataset is missing the railway tracks node {n}", - self.element, - ) - break - - if len(self.stops) > 1: - self.is_circular = ( - self.stops[0].stoparea == self.stops[-1].stoparea - ) - if ( - self.is_circular - and self.tracks - and self.tracks[0] != self.tracks[-1] - ): - self.city.warn( - "Non-closed rail sequence in a circular route", - self.element, - ) - - projected_stops_data = self.project_stops_on_line() - self.check_and_recover_stops_order(projected_stops_data) - self.apply_projected_stops_data(projected_stops_data) - - def apply_projected_stops_data(self, projected_stops_data: dict) -> None: - """Store better stop coordinates and indexes of first/last stops - that lie on a continuous track line, to the instance attributes. - """ - for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): - setattr(self, attr, projected_stops_data[attr]) - - for stop_data in projected_stops_data["stops_on_longest_line"]: - route_stop = stop_data["route_stop"] - route_stop.positions_on_rails = stop_data["positions_on_rails"] - if stop_coords := stop_data["coords"]: - route_stop.stop = stop_coords - - def get_extended_tracks(self) -> RailT: - """Amend tracks with points of leading/trailing self.stops - that were not projected onto the longest tracks line. - Return a new array. - """ - if self.first_stop_on_rails_index >= len(self.stops): - tracks = [route_stop.stop for route_stop in self.stops] - else: - tracks = ( - [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i < self.first_stop_on_rails_index - ] - + self.tracks - + [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i > self.last_stop_on_rails_index - ] - ) - return tracks - - def get_truncated_tracks(self, tracks: RailT) -> RailT: - """Truncate leading/trailing segments of `tracks` param - that are beyond the first and last stop locations. - Return a new array. - """ - if self.is_circular: - return tracks.copy() - - first_stop_location = find_segment(self.stops[0].stop, tracks, 0) - last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) - - if last_stop_location != (None, None): - seg2, u2 = last_stop_location - if u2 == 0.0: - # Make seg2 the segment the last_stop_location is - # at the middle or end of - seg2 -= 1 - # u2 = 1.0 - if seg2 + 2 < len(tracks): - tracks = tracks[0 : seg2 + 2] # noqa E203 - tracks[-1] = self.stops[-1].stop - - if first_stop_location != (None, None): - seg1, u1 = first_stop_location - if u1 == 1.0: - # Make seg1 the segment the first_stop_location is - # at the beginning or middle of - seg1 += 1 - # u1 = 0.0 - if seg1 > 0: - tracks = tracks[seg1:] - tracks[0] = self.stops[0].stop - - return tracks - - def are_tracks_complete(self) -> bool: - return ( - self.first_stop_on_rails_index == 0 - and self.last_stop_on_rails_index == len(self) - 1 - ) - - def get_tracks_geometry(self) -> RailT: - tracks = self.get_extended_tracks() - tracks = self.get_truncated_tracks(tracks) - return tracks - - def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: - disorder_warnings = [] - disorder_errors = [] - for i, route_stop in enumerate( - islice(self.stops, 1, len(self.stops) - 1), start=1 - ): - angle = angle_between( - self.stops[i - 1].stop, - route_stop.stop, - self.stops[i + 1].stop, - ) - if angle < ALLOWED_ANGLE_BETWEEN_STOPS: - msg = ( - "Angle between stops around " - f'"{route_stop.stoparea.name}" {route_stop.stop} ' - f"is too narrow, {angle} degrees" - ) - if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: - disorder_errors.append(msg) - else: - disorder_warnings.append(msg) - return disorder_warnings, disorder_errors - - def check_stops_order_on_tracks_direct( - self, stop_sequence: Iterator[dict] - ) -> str | None: - """Checks stops order on tracks, following stop_sequence - in direct order only. - :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', - 'coords'} for RouteStops that belong to the longest contiguous - sequence of tracks in a route. - :return: error message on the first order violation or None. - """ - allowed_order_violations = 1 if self.is_circular else 0 - max_position_on_rails = -1 - for stop_data in stop_sequence: - positions_on_rails = stop_data["positions_on_rails"] - suitable_occurrence = 0 - while ( - suitable_occurrence < len(positions_on_rails) - and positions_on_rails[suitable_occurrence] - < max_position_on_rails - ): - suitable_occurrence += 1 - if suitable_occurrence == len(positions_on_rails): - if allowed_order_violations > 0: - suitable_occurrence -= 1 - allowed_order_violations -= 1 - else: - route_stop = stop_data["route_stop"] - return ( - "Stops on tracks are unordered near " - f'"{route_stop.stoparea.name}" {route_stop.stop}' - ) - max_position_on_rails = positions_on_rails[suitable_occurrence] - - def check_stops_order_on_tracks( - self, projected_stops_data: dict - ) -> str | None: - """Checks stops order on tracks, trying direct and reversed - order of stops in the stop_sequence. - :param projected_stops_data: info about RouteStops that belong to the - longest contiguous sequence of tracks in a route. May be changed - if tracks reversing is performed. - :return: error message on the first order violation or None. - """ - error_message = self.check_stops_order_on_tracks_direct( - projected_stops_data["stops_on_longest_line"] - ) - if error_message: - error_message_reversed = self.check_stops_order_on_tracks_direct( - reversed(projected_stops_data["stops_on_longest_line"]) - ) - if error_message_reversed is None: - error_message = None - self.city.warn( - "Tracks seem to go in the opposite direction to stops", - self.element, - ) - self.tracks.reverse() - new_projected_stops_data = self.project_stops_on_line() - projected_stops_data.update(new_projected_stops_data) - - return error_message - - def check_stops_order( - self, projected_stops_data: dict - ) -> tuple[list[str], list[str]]: - ( - angle_disorder_warnings, - angle_disorder_errors, - ) = self.check_stops_order_by_angle() - disorder_on_tracks_error = self.check_stops_order_on_tracks( - projected_stops_data - ) - disorder_warnings = angle_disorder_warnings - disorder_errors = angle_disorder_errors - if disorder_on_tracks_error: - disorder_errors.append(disorder_on_tracks_error) - return disorder_warnings, disorder_errors - - def check_and_recover_stops_order( - self, projected_stops_data: dict - ) -> None: - """ - :param projected_stops_data: may change if we need to reverse tracks - """ - disorder_warnings, disorder_errors = self.check_stops_order( - projected_stops_data - ) - if disorder_warnings or disorder_errors: - resort_success = False - if self.city.recovery_data: - resort_success = self.try_resort_stops() - if resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.warn( - "Fixed with recovery data: " + msg, self.element - ) - - if not resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.error(msg, self.element) - - def try_resort_stops(self) -> bool: - """Precondition: self.city.recovery_data is not None. - Return success of station order recovering.""" - self_stops = {} # station name => RouteStop - for stop in self.stops: - station = stop.stoparea.station - stop_name = station.name - if stop_name == "?" and station.int_name: - stop_name = station.int_name - # We won't programmatically recover routes with repeating stations: - # such cases are rare and deserves manual verification - if stop_name in self_stops: - return False - self_stops[stop_name] = stop - - route_id = (self.colour, self.ref) - if route_id not in self.city.recovery_data: - return False - - stop_names = list(self_stops.keys()) - suitable_itineraries = [] - for itinerary in self.city.recovery_data[route_id]: - itinerary_stop_names = [ - stop["name"] for stop in itinerary["stations"] - ] - if not ( - len(stop_names) == len(itinerary_stop_names) - and sorted(stop_names) == sorted(itinerary_stop_names) - ): - continue - big_station_displacement = False - for it_stop in itinerary["stations"]: - name = it_stop["name"] - it_stop_center = it_stop["center"] - self_stop_center = self_stops[name].stoparea.station.center - if ( - distance(it_stop_center, self_stop_center) - > DISPLACEMENT_TOLERANCE - ): - big_station_displacement = True - break - if not big_station_displacement: - suitable_itineraries.append(itinerary) - - if len(suitable_itineraries) == 0: - return False - elif len(suitable_itineraries) == 1: - matching_itinerary = suitable_itineraries[0] - else: - from_tag = self.element["tags"].get("from") - to_tag = self.element["tags"].get("to") - if not from_tag and not to_tag: - return False - matching_itineraries = [ - itin - for itin in suitable_itineraries - if from_tag - and itin["from"] == from_tag - or to_tag - and itin["to"] == to_tag - ] - if len(matching_itineraries) != 1: - return False - matching_itinerary = matching_itineraries[0] - self.stops = [ - self_stops[stop["name"]] for stop in matching_itinerary["stations"] - ] - return True - - def get_end_transfers(self) -> tuple[IdT, IdT]: - """Using transfer ids because a train can arrive at different - stations within a transfer. But disregard transfer that may give - an impression of a circular route (for example, - Simonis / Elisabeth station and route 2 in Brussels). - """ - return ( - (self[0].stoparea.id, self[-1].stoparea.id) - if ( - self[0].stoparea.transfer is not None - and self[0].stoparea.transfer == self[-1].stoparea.transfer - ) - else ( - self[0].stoparea.transfer or self[0].stoparea.id, - self[-1].stoparea.transfer or self[-1].stoparea.id, - ) - ) - - def get_transfers_sequence(self) -> list[IdT]: - """Return a list of stoparea or transfer (if not None) ids.""" - transfer_seq = [ - stop.stoparea.transfer or stop.stoparea.id for stop in self - ] - if ( - self[0].stoparea.transfer is not None - and self[0].stoparea.transfer == self[-1].stoparea.transfer - ): - transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() - return transfer_seq - - def __len__(self) -> int: - return len(self.stops) - - def __getitem__(self, i) -> RouteStop: - return self.stops[i] - - def __iter__(self) -> Iterator[RouteStop]: - return iter(self.stops) - - def __repr__(self) -> str: - return ( - "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " - "circular={}, num_stops={}, line_length={} m, from={}, to={}" - ).format( - self.id, - self.mode, - self.ref, - self.name, - self.network, - self.interval, - self.is_circular, - len(self.stops), - self.stops[-1].distance, - self.stops[0], - self.stops[-1], - ) - - -class RouteMaster: - def __init__(self, city: City, master: OsmElementT = None) -> None: - self.city = city - self.routes = [] - self.best: Route = None - self.id: IdT = el_id(master) - self.has_master = master is not None - self.interval_from_master = False - if master: - self.ref = master["tags"].get( - "ref", master["tags"].get("name", None) - ) - try: - self.colour = normalize_colour( - master["tags"].get("colour", None) - ) - except ValueError: - self.colour = None - try: - self.infill = normalize_colour( - master["tags"].get("colour:infill", None) - ) - except ValueError: - self.infill = None - self.network = Route.get_network(master) - self.mode = master["tags"].get( - "route_master", None - ) # This tag is required, but okay - self.name = master["tags"].get("name", None) - self.interval = Route.get_interval(master["tags"]) - self.interval_from_master = self.interval is not None - else: - self.ref = None - self.colour = None - self.infill = None - self.network = None - self.mode = None - self.name = None - self.interval = None - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route in self: - for stoparea in route.stopareas(): - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - def add(self, route: Route) -> None: - if not self.network: - self.network = route.network - elif route.network and route.network != self.network: - self.city.error( - 'Route has different network ("{}") from master "{}"'.format( - route.network, self.network - ), - route.element, - ) - - if not self.colour: - self.colour = route.colour - elif route.colour and route.colour != self.colour: - self.city.notice( - 'Route "{}" has different colour from master "{}"'.format( - route.colour, self.colour - ), - route.element, - ) - - if not self.infill: - self.infill = route.infill - elif route.infill and route.infill != self.infill: - self.city.notice( - ( - f'Route "{route.infill}" has different infill colour ' - f'from master "{self.infill}"' - ), - route.element, - ) - - if not self.ref: - self.ref = route.ref - elif route.ref != self.ref: - self.city.notice( - 'Route "{}" has different ref from master "{}"'.format( - route.ref, self.ref - ), - route.element, - ) - - if not self.name: - self.name = route.name - - if not self.mode: - self.mode = route.mode - elif route.mode != self.mode: - self.city.error( - "Incompatible PT mode: master has {} and route has {}".format( - self.mode, route.mode - ), - route.element, - ) - return - - if not self.interval_from_master and route.interval: - if not self.interval: - self.interval = route.interval - else: - self.interval = min(self.interval, route.interval) - - # Choose minimal id for determinancy - if not self.has_master and (not self.id or self.id > route.id): - self.id = route.id - - self.routes.append(route) - if ( - not self.best - or len(route.stops) > len(self.best.stops) - or ( - # Choose route with minimal id for determinancy - len(route.stops) == len(self.best.stops) - and route.element["id"] < self.best.element["id"] - ) - ): - self.best = route - - def get_meaningful_routes(self) -> list[Route]: - return [route for route in self if len(route) >= 2] - - def find_twin_routes(self) -> dict[Route, Route]: - """Two non-circular routes are twins if they have the same end - stations and opposite directions, and the number of stations is - the same or almost the same. We'll then find stops that are present - in one direction and is missing in another direction - to warn. - """ - - twin_routes = {} # route => "twin" route - - for route in self.get_meaningful_routes(): - if route.is_circular: - continue # Difficult to calculate. TODO(?) in the future - if route in twin_routes: - continue - - route_transfer_ids = set(route.get_transfers_sequence()) - ends = route.get_end_transfers() - ends_reversed = ends[::-1] - - twin_candidates = [ - r - for r in self - if not r.is_circular - and r not in twin_routes - and r.get_end_transfers() == ends_reversed - # If absolute or relative difference in station count is large, - # possibly it's an express version of a route - skip it. - and ( - abs(len(r) - len(route)) <= 2 - or abs(len(r) - len(route)) / max(len(r), len(route)) - <= 0.2 - ) - ] - - if not twin_candidates: - continue - - twin_route = min( - twin_candidates, - key=lambda r: len( - route_transfer_ids ^ set(r.get_transfers_sequence()) - ), - ) - twin_routes[route] = twin_route - twin_routes[twin_route] = route - - return twin_routes - - def check_return_routes(self) -> None: - """Check if a route has return direction, and if twin routes - miss stations. - """ - meaningful_routes = self.get_meaningful_routes() - - if len(meaningful_routes) == 0: - self.city.error( - f"An empty route master {self.id}. " - "Please set construction:route if it is under construction" - ) - elif len(meaningful_routes) == 1: - log_function = ( - self.city.error - if not self.best.is_circular - else self.city.notice - ) - log_function( - "Only one route in route_master. " - "Please check if it needs a return route", - self.best.element, - ) - else: - self.check_return_circular_routes() - self.check_return_noncircular_routes() - - def check_return_noncircular_routes(self) -> None: - routes = [ - route - for route in self.get_meaningful_routes() - if not route.is_circular - ] - all_ends = {route.get_end_transfers(): route for route in routes} - for route in routes: - ends = route.get_end_transfers() - if ends[::-1] not in all_ends: - self.city.notice( - "Route does not have a return direction", route.element - ) - - twin_routes = self.find_twin_routes() - for route1, route2 in twin_routes.items(): - if route1.id > route2.id: - continue # to process a pair of routes only once - # and to ensure the order of routes in the pair - self.alert_twin_routes_differ(route1, route2) - - def check_return_circular_routes(self) -> None: - routes = { - route - for route in self.get_meaningful_routes() - if route.is_circular - } - routes_having_backward = set() - - for route in routes: - if route in routes_having_backward: - continue - transfer_sequence1 = [ - stop.stoparea.transfer or stop.stoparea.id for stop in route - ] - transfer_sequence1.pop() - for potential_backward_route in routes - {route}: - transfer_sequence2 = [ - stop.stoparea.transfer or stop.stoparea.id - for stop in potential_backward_route - ][ - -2::-1 - ] # truncate repeated first stop and reverse - common_subsequence = self.find_common_circular_subsequence( - transfer_sequence1, transfer_sequence2 - ) - if len(common_subsequence) >= 0.8 * min( - len(transfer_sequence1), len(transfer_sequence2) - ): - routes_having_backward.add(route) - routes_having_backward.add(potential_backward_route) - break - - for route in routes - routes_having_backward: - self.city.notice( - "Route does not have a return direction", route.element - ) - - @staticmethod - def find_common_circular_subsequence( - seq1: list[T], seq2: list[T] - ) -> list[T]: - """seq1 and seq2 are supposed to be stops of some circular routes. - Prerequisites to rely on the result: - - elements of each sequence are not repeated - - the order of stations is not violated. - Under these conditions we don't need LCS algorithm. Linear scan is - sufficient. - """ - i1, i2 = -1, -1 - for i1, x in enumerate(seq1): - try: - i2 = seq2.index(x) - except ValueError: - continue - else: - # x is found both in seq1 and seq2 - break - - if i2 == -1: - return [] - - # Shift cyclically so that the common element takes the first position - # both in seq1 and seq2 - seq1 = seq1[i1:] + seq1[:i1] - seq2 = seq2[i2:] + seq2[:i2] - - common_subsequence = [] - i2 = 0 - for x in seq1: - try: - i2 = seq2.index(x, i2) - except ValueError: - continue - common_subsequence.append(x) - i2 += 1 - if i2 >= len(seq2): - break - return common_subsequence - - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: - """Arguments are that route1.id < route2.id""" - ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) = self.calculate_twin_routes_diff(route1, route2) - - for st in stops_missing_from_route1: - if ( - not route1.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route1.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.city.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route2.id} but not included in {route1.id}", - route1.element, - ) - - for st in stops_missing_from_route2: - if ( - not route2.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route2.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.city.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route1.id} but not included in {route2.id}", - route2.element, - ) - - for st1, st2 in stops_that_dont_match: - if ( - st1.stoparea.station == st2.stoparea.station - or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE - ): - self.city.notice( - "Should there be one stoparea or a transfer between " - f"{st1.stoparea.station.name} {st1.stop} and " - f"{st2.stoparea.station.name} {st2.stop}?", - route1.element, - ) - - @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: - """Wagner–Fischer algorithm for stops diff in two twin routes.""" - - stops1 = route1.stops - stops2 = route2.stops[::-1] - - def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: - return ( - stop1.stoparea == stop2.stoparea - or stop1.stoparea.transfer is not None - and stop1.stoparea.transfer == stop2.stoparea.transfer - ) - - d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] - d[0] = list(range(len(stops2) + 1)) - for i in range(len(stops1) + 1): - d[i][0] = i - - for i in range(1, len(stops1) + 1): - for j in range(1, len(stops2) + 1): - d[i][j] = ( - d[i - 1][j - 1] - if stops_match(stops1[i - 1], stops2[j - 1]) - else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 - ) - - stops_missing_from_route1: list[RouteStop] = [] - stops_missing_from_route2: list[RouteStop] = [] - stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] - - i = len(stops1) - j = len(stops2) - while not (i == 0 and j == 0): - action = None - if i > 0 and j > 0: - match = stops_match(stops1[i - 1], stops2[j - 1]) - if match and d[i - 1][j - 1] == d[i][j]: - action = "no" - elif not match and d[i - 1][j - 1] + 1 == d[i][j]: - action = "change" - if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: - action = "add_2" - if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: - action = "add_1" - - match action: - case "add_1": - stops_missing_from_route1.append(stops2[j - 1]) - j -= 1 - case "add_2": - stops_missing_from_route2.append(stops1[i - 1]) - i -= 1 - case _: - if action == "change": - stops_that_dont_match.append( - (stops1[i - 1], stops2[j - 1]) - ) - i -= 1 - j -= 1 - return ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) - - def __len__(self) -> int: - return len(self.routes) - - def __getitem__(self, i) -> Route: - return self.routes[i] - - def __iter__(self) -> Iterator[Route]: - return iter(self.routes) - - def __repr__(self) -> str: - return ( - f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " - f"name={self.name}, network={self.network}, " - f"num_variants={len(self.routes)}" - ) - - -class City: - route_class = Route - - def __init__(self, city_data: dict, overground: bool = False) -> None: - self.validate_called = False - self.errors: list[str] = [] - self.warnings: list[str] = [] - self.notices: list[str] = [] - self.id = None - self.try_fill_int_attribute(city_data, "id") - self.name = city_data["name"] - self.country = city_data["country"] - self.continent = city_data["continent"] - self.overground = overground - if not overground: - self.try_fill_int_attribute(city_data, "num_stations") - self.try_fill_int_attribute(city_data, "num_lines", "0") - self.try_fill_int_attribute(city_data, "num_light_lines", "0") - self.try_fill_int_attribute(city_data, "num_interchanges", "0") - else: - self.try_fill_int_attribute(city_data, "num_tram_lines", "0") - self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") - self.try_fill_int_attribute(city_data, "num_bus_lines", "0") - self.try_fill_int_attribute(city_data, "num_other_lines", "0") - - # Acquiring list of networks and modes - networks = ( - None - if not city_data["networks"] - else city_data["networks"].split(":") - ) - if not networks or len(networks[-1]) == 0: - self.networks = [] - else: - self.networks = set( - filter(None, [x.strip() for x in networks[-1].split(";")]) - ) - if not networks or len(networks) < 2 or len(networks[0]) == 0: - if self.overground: - self.modes = DEFAULT_MODES_OVERGROUND - else: - self.modes = DEFAULT_MODES_RAPID - else: - self.modes = {x.strip() for x in networks[0].split(",")} - - # Reversing bbox so it is (xmin, ymin, xmax, ymax) - bbox = city_data["bbox"].split(",") - if len(bbox) == 4: - self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] - else: - self.bbox = None - - self.elements: dict[IdT, OsmElementT] = {} - self.stations: dict[IdT, list[StopArea]] = defaultdict(list) - self.routes: dict[str, RouteMaster] = {} # keys are route_master refs - self.masters: dict[IdT, OsmElementT] = {} # Route id → master element - self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) - self.transfers: list[set[StopArea]] = [] - self.station_ids: set[IdT] = set() - self.stops_and_platforms: set[IdT] = set() - self.recovery_data = None - - def try_fill_int_attribute( - self, city_data: dict, attr: str, default: str | None = None - ) -> None: - """Try to convert string value to int. Conversion is considered - to fail if one of the following is true: - * attr is not empty and data type casting fails; - * attr is empty and no default value is given. - In such cases the city is marked as bad by adding an error - to the city validation log. - """ - attr_value = city_data[attr] - if not attr_value and default is not None: - attr_value = default - - try: - attr_int = int(attr_value) - except ValueError: - print_value = ( - f"{city_data[attr]}" if city_data[attr] else "" - ) - self.error( - f"Configuration error: wrong value for {attr}: {print_value}" - ) - setattr(self, attr, 0) - else: - setattr(self, attr, attr_int) - - @staticmethod - def log_message(message: str, el: OsmElementT) -> str: - if el: - tags = el.get("tags", {}) - message += ' ({} {}, "{}")'.format( - el["type"], - el.get("id", el.get("ref")), - tags.get("name", tags.get("ref", "")), - ) - return message - - def notice(self, message: str, el: OsmElementT | None = None) -> None: - """This type of message may point to a potential problem.""" - msg = City.log_message(message, el) - self.notices.append(msg) - - def warn(self, message: str, el: OsmElementT | None = None) -> None: - """A warning is definitely a problem but is doesn't prevent - from building a routing file and doesn't invalidate the city. - """ - msg = City.log_message(message, el) - self.warnings.append(msg) - - def error(self, message: str, el: OsmElementT | None = None) -> None: - """Error is a critical problem that invalidates the city.""" - msg = City.log_message(message, el) - self.errors.append(msg) - - def contains(self, el: OsmElementT) -> bool: - center = el_center(el) - if center: - return ( - self.bbox[0] <= center[1] <= self.bbox[2] - and self.bbox[1] <= center[0] <= self.bbox[3] - ) - return False - - def add(self, el: OsmElementT) -> None: - if el["type"] == "relation" and "members" not in el: - return - - self.elements[el_id(el)] = el - if not (el["type"] == "relation" and "tags" in el): - return - - relation_type = el["tags"].get("type") - if relation_type == "route_master": - for m in el["members"]: - if m["type"] != "relation": - continue - - if el_id(m) in self.masters: - self.error("Route in two route_masters", m) - self.masters[el_id(m)] = el - - elif el["tags"].get("public_transport") == "stop_area": - if relation_type != "public_transport": - self.warn( - "stop_area relation with " - f"type={relation_type}, needed type=public_transport", - el, - ) - return - - warned_about_duplicates = False - for m in el["members"]: - stop_areas = self.stop_areas[el_id(m)] - if el in stop_areas and not warned_about_duplicates: - self.warn("Duplicate element in a stop area", el) - warned_about_duplicates = True - else: - stop_areas.append(el) - - def make_transfer(self, stoparea_group: OsmElementT) -> None: - transfer: set[StopArea] = set() - for m in stoparea_group["members"]: - k = el_id(m) - el = self.elements.get(k) - if not el: - # A stoparea_group member may validly not belong to the city - # while the stoparea_group does - near the city bbox boundary - continue - if "tags" not in el: - self.warn( - "An untagged object {} in a stop_area_group".format(k), - stoparea_group, - ) - continue - if ( - el["type"] != "relation" - or el["tags"].get("type") != "public_transport" - or el["tags"].get("public_transport") != "stop_area" - ): - continue - if k in self.stations: - stoparea = self.stations[k][0] - transfer.add(stoparea) - if stoparea.transfer: - # TODO: properly process such cases. - # Counterexample 1: Paris, - # Châtelet subway station <-> - # "Châtelet - Les Halles" railway station <-> - # Les Halles subway station - # Counterexample 2: Saint-Petersburg, transfers - # Витебский вокзал <-> - # Пушкинская <-> - # Звенигородская - self.warn( - "Stop area {} belongs to multiple interchanges".format( - k - ) - ) - stoparea.transfer = el_id(stoparea_group) - if len(transfer) > 1: - self.transfers.append(transfer) - - def extract_routes(self) -> None: - # Extract stations - processed_stop_areas = set() - for el in self.elements.values(): - if Station.is_station(el, self.modes): - # See PR https://github.com/mapsme/subways/pull/98 - if ( - el["type"] == "relation" - and el["tags"].get("type") != "multipolygon" - ): - rel_type = el["tags"].get("type") - self.warn( - "A railway station cannot be a relation of type " - f"{rel_type}", - el, - ) - continue - st = Station(el, self) - self.station_ids.add(st.id) - if st.id in self.stop_areas: - stations = [] - for sa in self.stop_areas[st.id]: - stations.append(StopArea(st, self, sa)) - else: - stations = [StopArea(st, self)] - - for station in stations: - if station.id not in processed_stop_areas: - processed_stop_areas.add(station.id) - for st_el in station.get_elements(): - self.stations[st_el].append(station) - - # Check that stops and platforms belong to - # a single stop_area - for sp in chain(station.stops, station.platforms): - if sp in self.stops_and_platforms: - self.notice( - f"A stop or a platform {sp} belongs to " - "multiple stop areas, might be correct" - ) - else: - self.stops_and_platforms.add(sp) - - # Extract routes - for el in self.elements.values(): - if Route.is_route(el, self.modes): - if el["tags"].get("access") in ("no", "private"): - continue - route_id = el_id(el) - master = self.masters.get(route_id, None) - if self.networks: - network = Route.get_network(el) - if master: - master_network = Route.get_network(master) - else: - master_network = None - if ( - network not in self.networks - and master_network not in self.networks - ): - continue - - route = self.route_class(el, self, master) - if not route.stops: - self.warn("Route has no stops", el) - continue - elif len(route.stops) == 1: - self.warn("Route has only one stop", el) - continue - - k = el_id(master) if master else route.ref - if k not in self.routes: - self.routes[k] = RouteMaster(self, master) - self.routes[k].add(route) - - # Sometimes adding a route to a newly initialized RouteMaster - # can fail - if len(self.routes[k]) == 0: - del self.routes[k] - - # And while we're iterating over relations, find interchanges - if ( - el["type"] == "relation" - and el.get("tags", {}).get("public_transport", None) - == "stop_area_group" - ): - self.make_transfer(el) - - # Filter transfers, leaving only stations that belong to routes - own_stopareas = set(self.stopareas()) - - self.transfers = [ - inner_transfer - for inner_transfer in ( - own_stopareas.intersection(transfer) - for transfer in self.transfers - ) - if len(inner_transfer) > 1 - ] - - def __iter__(self) -> Iterator[RouteMaster]: - return iter(self.routes.values()) - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route_master in self: - for stoparea in route_master.stopareas(): - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - @property - def is_good(self) -> bool: - if not (self.errors or self.validate_called): - raise RuntimeError( - "You mustn't refer to City.is_good property before calling " - "the City.validate() method unless an error already occurred." - ) - return len(self.errors) == 0 - - def get_validation_result(self) -> dict: - result = { - "name": self.name, - "country": self.country, - "continent": self.continent, - "stations_found": getattr(self, "found_stations", 0), - "transfers_found": getattr(self, "found_interchanges", 0), - "unused_entrances": getattr(self, "unused_entrances", 0), - "networks": getattr(self, "found_networks", 0), - } - if not self.overground: - result.update( - { - "subwayl_expected": getattr(self, "num_lines", 0), - "lightrl_expected": getattr(self, "num_light_lines", 0), - "subwayl_found": getattr(self, "found_lines", 0), - "lightrl_found": getattr(self, "found_light_lines", 0), - "stations_expected": getattr(self, "num_stations", 0), - "transfers_expected": getattr(self, "num_interchanges", 0), - } - ) - else: - result.update( - { - "stations_expected": 0, - "transfers_expected": 0, - "busl_expected": getattr(self, "num_bus_lines", 0), - "trolleybusl_expected": getattr( - self, "num_trolleybus_lines", 0 - ), - "traml_expected": getattr(self, "num_tram_lines", 0), - "otherl_expected": getattr(self, "num_other_lines", 0), - "busl_found": getattr(self, "found_bus_lines", 0), - "trolleybusl_found": getattr( - self, "found_trolleybus_lines", 0 - ), - "traml_found": getattr(self, "found_tram_lines", 0), - "otherl_found": getattr(self, "found_other_lines", 0), - } - ) - result["warnings"] = self.warnings - result["errors"] = self.errors - result["notices"] = self.notices - return result - - def count_unused_entrances(self) -> None: - global used_entrances - stop_areas = set() - for el in self.elements.values(): - if ( - el["type"] == "relation" - and "tags" in el - and el["tags"].get("public_transport") == "stop_area" - and "members" in el - ): - stop_areas.update([el_id(m) for m in el["members"]]) - unused = [] - not_in_sa = [] - for el in self.elements.values(): - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - i = el_id(el) - if i in self.stations: - used_entrances.add(i) - if i not in stop_areas: - not_in_sa.append(i) - if i not in self.stations: - unused.append(i) - self.unused_entrances = len(unused) - self.entrances_not_in_stop_areas = len(not_in_sa) - if unused: - self.notice( - f"{len(unused)} subway entrances are not connected to a " - f"station: {format_elid_list(unused)}" - ) - if not_in_sa: - self.notice( - f"{len(not_in_sa)} subway entrances are not in stop_area " - f"relations: {format_elid_list(not_in_sa)}" - ) - - def validate_lines(self) -> None: - self.found_light_lines = len( - [x for x in self.routes.values() if x.mode != "subway"] - ) - self.found_lines = len(self.routes) - self.found_light_lines - if self.found_lines != self.num_lines: - self.error( - "Found {} subway lines, expected {}".format( - self.found_lines, self.num_lines - ) - ) - if self.found_light_lines != self.num_light_lines: - self.error( - "Found {} light rail lines, expected {}".format( - self.found_light_lines, self.num_light_lines - ) - ) - - def validate_overground_lines(self) -> None: - self.found_tram_lines = len( - [x for x in self.routes.values() if x.mode == "tram"] - ) - self.found_bus_lines = len( - [x for x in self.routes.values() if x.mode == "bus"] - ) - self.found_trolleybus_lines = len( - [x for x in self.routes.values() if x.mode == "trolleybus"] - ) - self.found_other_lines = len( - [ - x - for x in self.routes.values() - if x.mode not in ("bus", "trolleybus", "tram") - ] - ) - if self.found_tram_lines != self.num_tram_lines: - log_function = ( - self.error if self.found_tram_lines == 0 else self.notice - ) - log_function( - "Found {} tram lines, expected {}".format( - self.found_tram_lines, self.num_tram_lines - ), - ) - - def validate(self) -> None: - networks = Counter() - self.found_stations = 0 - unused_stations = set(self.station_ids) - for rmaster in self.routes.values(): - networks[str(rmaster.network)] += 1 - if not self.overground: - rmaster.check_return_routes() - route_stations = set() - for sa in rmaster.stopareas(): - route_stations.add(sa.transfer or sa.id) - unused_stations.discard(sa.station.id) - self.found_stations += len(route_stations) - if unused_stations: - self.unused_stations = len(unused_stations) - self.notice( - "{} unused stations: {}".format( - self.unused_stations, format_elid_list(unused_stations) - ) - ) - self.count_unused_entrances() - self.found_interchanges = len(self.transfers) - - if self.overground: - self.validate_overground_lines() - else: - self.validate_lines() - - if self.found_stations != self.num_stations: - msg = "Found {} stations in routes, expected {}".format( - self.found_stations, self.num_stations - ) - log_function = ( - self.error - if self.num_stations > 0 - and not ( - 0 - <= (self.num_stations - self.found_stations) - / self.num_stations - <= ALLOWED_STATIONS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - if self.found_interchanges != self.num_interchanges: - msg = "Found {} interchanges, expected {}".format( - self.found_interchanges, self.num_interchanges - ) - log_function = ( - self.error - if self.num_interchanges != 0 - and not ( - (self.num_interchanges - self.found_interchanges) - / self.num_interchanges - <= ALLOWED_TRANSFERS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - self.found_networks = len(networks) - if len(networks) > max(1, len(self.networks)): - n_str = "; ".join( - ["{} ({})".format(k, v) for k, v in networks.items()] - ) - self.notice("More than one network: {}".format(n_str)) - - self.validate_called = True - - def calculate_distances(self) -> None: - for route_master in self: - for route in route_master: - route.calculate_distances() - - -def find_transfers( - elements: list[OsmElementT], cities: Collection[City] -) -> TransfersT: - """As for now, two Cities may contain the same stoparea, but those - StopArea instances would have different python id. So we don't store - references to StopAreas, but only their ids. This is important at - inter-city interchanges. - """ - stop_area_groups = [ - el - for el in elements - if el["type"] == "relation" - and "members" in el - and el.get("tags", {}).get("public_transport") == "stop_area_group" - ] - - stopareas_in_cities_ids = set( - stoparea.id - for city in cities - if city.is_good - for stoparea in city.stopareas() - ) - - transfers = [] - for stop_area_group in stop_area_groups: - transfer: TransferT = set( - member_id - for member_id in ( - el_id(member) for member in stop_area_group["members"] - ) - if member_id in stopareas_in_cities_ids - ) - if len(transfer) > 1: - transfers.append(transfer) - return transfers - - -def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: - global used_entrances - features = [] - for el in elements: - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - if el_id(el) not in used_entrances: - geometry = {"type": "Point", "coordinates": el_center(el)} - properties = { - k: v - for k, v in el["tags"].items() - if k not in ("railway", "entrance") - } - features.append( - { - "type": "Feature", - "geometry": geometry, - "properties": properties, - } - ) - return {"type": "FeatureCollection", "features": features} diff --git a/subways/__init__.py b/subways/__init__.py new file mode 100644 index 00000000..c734b54a --- /dev/null +++ b/subways/__init__.py @@ -0,0 +1,92 @@ +from .consts import ( + ALL_MODES, + CONSTRUCTION_KEYS, + DEFAULT_MODES_RAPID, + DEFAULT_MODES_OVERGROUND, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, + MODES_OVERGROUND, + MODES_RAPID, + RAILWAY_TYPES, +) +from .css_colours import normalize_colour +from .geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + is_near, + project_on_line, +) +from .osm_element import el_center, el_id +from .overpass import multi_overpass, overpass_request +from .subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from .types import ( + CriticalValidationError, + IdT, + LonLat, + OsmElementT, + RailT, + TransferT, + TransfersT, +) +from .validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + DEFAULT_SPREADSHEET_ID, + get_cities_info, + prepare_cities, + validate_cities, +) + + +__all__ = [ + "ALL_MODES", + "CONSTRUCTION_KEYS", + "DEFAULT_MODES_RAPID", + "DEFAULT_MODES_OVERGROUND", + "DISPLACEMENT_TOLERANCE", + "MAX_DISTANCE_STOP_TO_LINE", + "MODES_OVERGROUND", + "MODES_RAPID", + "RAILWAY_TYPES", + "angle_between", + "distance", + "distance_on_line", + "find_segment", + "is_near", + "project_on_line", + "normalize_colour", + "el_center", + "el_id", + "overpass_request", + "multi_overpass", + "dump_yaml", + "load_xml", + "make_geojson", + "read_recovery_data", + "write_recovery_data", + "CriticalValidationError", + "IdT", + "LonLat", + "OsmElementT", + "RailT", + "TransferT", + "TransfersT", + "add_osm_elements_to_cities", + "BAD_MARK", + "calculate_centers", + "DEFAULT_CITIES_INFO_URL", + "DEFAULT_SPREADSHEET_ID", + "get_cities_info", + "prepare_cities", + "validate_cities", +] diff --git a/subways/consts.py b/subways/consts.py new file mode 100644 index 00000000..4d75426b --- /dev/null +++ b/subways/consts.py @@ -0,0 +1,26 @@ +MAX_DISTANCE_STOP_TO_LINE = 50 # in meters + +# If an object was moved not too far compared to previous validator run, +# it is likely the same object +DISPLACEMENT_TOLERANCE = 300 # in meters + +MODES_RAPID = {"subway", "light_rail", "monorail", "train"} +MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} +DEFAULT_MODES_RAPID = {"subway", "light_rail"} +DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? +ALL_MODES = MODES_RAPID | MODES_OVERGROUND +RAILWAY_TYPES = { + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", +} +CONSTRUCTION_KEYS = ( + "construction", + "proposed", + "construction:railway", + "proposed:railway", +) diff --git a/css_colours.py b/subways/css_colours.py similarity index 100% rename from css_colours.py rename to subways/css_colours.py diff --git a/subways/geom_utils.py b/subways/geom_utils.py new file mode 100644 index 00000000..30d1a2d7 --- /dev/null +++ b/subways/geom_utils.py @@ -0,0 +1,175 @@ +import math + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.types import LonLat, RailT + + +def distance(p1: LonLat, p2: LonLat) -> float: + if p1 is None or p2 is None: + raise Exception( + "One of arguments to distance({}, {}) is None".format(p1, p2) + ) + dx = math.radians(p1[0] - p2[0]) * math.cos( + 0.5 * math.radians(p1[1] + p2[1]) + ) + dy = math.radians(p1[1] - p2[1]) + return 6378137 * math.sqrt(dx * dx + dy * dy) + + +def is_near(p1: LonLat, p2: LonLat) -> bool: + return ( + p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 + and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 + ) + + +def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: + """Given three points, return u - the position of projection of + point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector + """ + dp = (p2[0] - p1[0], p2[1] - p1[1]) + d2 = dp[0] * dp[0] + dp[1] * dp[1] + if d2 < 1e-14: + return None + u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 + if not 0 <= u <= 1: + return None + return u + + +def project_on_line(p: LonLat, line: RailT) -> dict: + result = { + # In the first approximation, position on rails is the index of the + # closest vertex of line to the point p. Fractional value means that + # the projected point lies on a segment between two vertices. + # More than one value can occur if a route follows the same tracks + # more than once. + "positions_on_line": None, + "projected_point": None, # (lon, lat) + } + + if len(line) < 2: + return result + d_min = MAX_DISTANCE_STOP_TO_LINE * 5 + closest_to_vertex = False + # First, check vertices in the line + for i, vertex in enumerate(line): + d = distance(p, vertex) + if d < d_min: + result["positions_on_line"] = [i] + result["projected_point"] = vertex + d_min = d + closest_to_vertex = True + elif vertex == result["projected_point"]: + # Repeated occurrence of the track vertex in line, like Oslo Line 5 + result["positions_on_line"].append(i) + # And then calculate distances to each segment + for seg in range(len(line) - 1): + # Check bbox for speed + if not ( + ( + min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE + <= p[0] + <= max(line[seg][0], line[seg + 1][0]) + + MAX_DISTANCE_STOP_TO_LINE + ) + and ( + min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE + <= p[1] + <= max(line[seg][1], line[seg + 1][1]) + + MAX_DISTANCE_STOP_TO_LINE + ) + ): + continue + u = project_on_segment(p, line[seg], line[seg + 1]) + if u: + projected_point = ( + line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), + line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), + ) + d = distance(p, projected_point) + if d < d_min: + result["positions_on_line"] = [seg + u] + result["projected_point"] = projected_point + d_min = d + closest_to_vertex = False + elif projected_point == result["projected_point"]: + # Repeated occurrence of the track segment in line, + # like Oslo Line 5 + if not closest_to_vertex: + result["positions_on_line"].append(seg + u) + return result + + +def find_segment( + p: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[int, float] | tuple[None, None]: + """Returns index of a segment and a position inside it.""" + EPS = 1e-9 + for seg in range(start_vertex, len(line) - 1): + if is_near(p, line[seg]): + return seg, 0.0 + if line[seg][0] == line[seg + 1][0]: + if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): + continue + px = None + else: + px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) + if px is None or (0 <= px <= 1): + if line[seg][1] == line[seg + 1][1]: + if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): + continue + py = None + else: + py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) + if py is None or (0 <= py <= 1): + if py is None or px is None or (px - EPS <= py <= px + EPS): + return seg, px or py + return None, None + + +def distance_on_line( + p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[float, int] | None: + """Calculates distance via line between projections + of points p1 and p2. Returns a TUPLE of (d, vertex): + d is the distance and vertex is the number of the second + vertex, to continue calculations for the next point.""" + line_len = len(line) + seg1, pos1 = find_segment(p1, line, start_vertex) + if seg1 is None: + # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) + return None + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + if line[0] == line[-1]: + line = line + line[1:] + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) + return None + if seg1 == seg2: + return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 + if seg2 < seg1: + # Should not happen + raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) + d = 0 + if pos1 < 1: + d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) + for i in range(seg1 + 1, seg2): + d += distance(line[i], line[i + 1]) + if pos2 > 0: + d += distance(line[seg2], line[seg2 + 1]) * pos2 + return d, seg2 % line_len + + +def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: + a = round( + abs( + math.degrees( + math.atan2(p1[1] - c[1], p1[0] - c[0]) + - math.atan2(p2[1] - c[1], p2[0] - c[0]) + ) + ) + ) + return a if a <= 180 else 360 - a diff --git a/subways/osm_element.py b/subways/osm_element.py new file mode 100644 index 00000000..5ea8bc4b --- /dev/null +++ b/subways/osm_element.py @@ -0,0 +1,19 @@ +from subways.types import IdT, LonLat, OsmElementT + + +def el_id(el: OsmElementT) -> IdT | None: + if not el: + return None + if "type" not in el: + raise Exception("What is this element? {}".format(el)) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) + + +def el_center(el: OsmElementT) -> LonLat | None: + if not el: + return None + if "lat" in el: + return el["lon"], el["lat"] + elif "center" in el: + return el["center"]["lon"], el["center"]["lat"] + return None diff --git a/subways/overpass.py b/subways/overpass.py new file mode 100644 index 00000000..88c128e6 --- /dev/null +++ b/subways/overpass.py @@ -0,0 +1,60 @@ +import json +import logging +import time +import urllib.parse +import urllib.request + +from subways.consts import MODES_OVERGROUND, MODES_RAPID +from subways.types import OsmElementT + + +def compose_overpass_request( + overground: bool, bboxes: list[list[float]] +) -> str: + if not bboxes: + raise RuntimeError("No bboxes given for overpass request") + + query = "[out:json][timeout:1000];(" + modes = MODES_OVERGROUND if overground else MODES_RAPID + for bbox in bboxes: + bbox_part = f"({','.join(str(coord) for coord in bbox)})" + query += "(" + for mode in sorted(modes): + query += f'rel[route="{mode}"]{bbox_part};' + query += ");" + query += "rel(br)[type=route_master];" + if not overground: + query += f"node[railway=subway_entrance]{bbox_part};" + query += f"node[railway=train_station_entrance]{bbox_part};" + query += f"rel[public_transport=stop_area]{bbox_part};" + query += ( + "rel(br)[type=public_transport][public_transport=stop_area_group];" + ) + query += ");(._;>>;);out body center qt;" + logging.debug("Query: %s", query) + return query + + +def overpass_request( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + query = compose_overpass_request(overground, bboxes) + url = f"{overpass_api}?data={urllib.parse.quote(query)}" + response = urllib.request.urlopen(url, timeout=1000) + if (r_code := response.getcode()) != 200: + raise Exception(f"Failed to query Overpass API: HTTP {r_code}") + return json.load(response)["elements"] + + +def multi_overpass( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + SLICE_SIZE = 10 + INTERREQUEST_WAIT = 5 # in seconds + result = [] + for i in range(0, len(bboxes), SLICE_SIZE): + if i > 0: + time.sleep(INTERREQUEST_WAIT) + bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 + result.extend(overpass_request(overground, overpass_api, bboxes_i)) + return result diff --git a/processors/__init__.py b/subways/processors/__init__.py similarity index 56% rename from processors/__init__.py rename to subways/processors/__init__.py index 4f5ed844..89ae0169 100644 --- a/processors/__init__.py +++ b/subways/processors/__init__.py @@ -1,4 +1,8 @@ # Import only those processors (modules) you want to use. # Ignore F401 "module imported but unused" violation since these modules # are addressed via introspection. -from . import mapsme, gtfs # noqa F401 +from . import gtfs, mapsme # noqa F401 +from ._common import transit_to_dict + + +__all__ = ["gtfs", "mapsme", "transit_to_dict"] diff --git a/processors/_common.py b/subways/processors/_common.py similarity index 95% rename from processors/_common.py rename to subways/processors/_common.py index d60ff070..1d58da45 100644 --- a/processors/_common.py +++ b/subways/processors/_common.py @@ -1,4 +1,12 @@ -from subway_structure import City, el_center, TransfersT +from __future__ import annotations + +import typing + +from subways.osm_element import el_center +from subways.types import TransfersT + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier diff --git a/processors/gtfs.py b/subways/processors/gtfs.py similarity index 98% rename from processors/gtfs.py rename to subways/processors/gtfs.py index 463443ec..3722815f 100644 --- a/processors/gtfs.py +++ b/subways/processors/gtfs.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import csv +import typing from functools import partial from io import BytesIO, StringIO from itertools import permutations @@ -12,11 +15,11 @@ TRANSFER_PENALTY, transit_to_dict, ) -from subway_structure import ( - City, - distance, - TransfersT, -) +from subways.types import TransfersT +from subways.geom_utils import distance + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_TRIP_START_TIME = (5, 0) # 05:00 diff --git a/processors/mapsme.py b/subways/processors/mapsme.py similarity index 97% rename from processors/mapsme.py rename to subways/processors/mapsme.py index e87ffe0e..e176832b 100755 --- a/processors/mapsme.py +++ b/subways/processors/mapsme.py @@ -1,22 +1,18 @@ +from __future__ import annotations + import json import logging import os +import typing from collections import defaultdict from collections.abc import Callable from typing import Any, TypeAlias -from subway_structure import ( - City, - DISPLACEMENT_TOLERANCE, - distance, - el_center, - IdT, - LonLat, - OsmElementT, - Station, - StopArea, - TransfersT, -) +from subways.consts import DISPLACEMENT_TOLERANCE +from subways.geom_utils import distance +from subways.osm_element import el_center +from subways.structure.station import Station +from subways.types import IdT, LonLat, OsmElementT, TransfersT from ._common import ( DEFAULT_INTERVAL, format_colour, @@ -25,6 +21,11 @@ TRANSFER_PENALTY, ) +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea + + OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s diff --git a/requirements.txt b/subways/requirements.txt similarity index 100% rename from requirements.txt rename to subways/requirements.txt diff --git a/subways/structure/__init__.py b/subways/structure/__init__.py new file mode 100644 index 00000000..6ef67d13 --- /dev/null +++ b/subways/structure/__init__.py @@ -0,0 +1,17 @@ +from .city import City, get_unused_subway_entrances_geojson +from .route import Route +from .route_master import RouteMaster +from .route_stop import RouteStop +from .station import Station +from .stop_area import StopArea + + +__all__ = [ + "City", + "get_unused_subway_entrances_geojson", + "Route", + "RouteMaster", + "RouteStop", + "Station", + "StopArea", +] diff --git a/subways/structure/city.py b/subways/structure/city.py new file mode 100644 index 00000000..441c08b1 --- /dev/null +++ b/subways/structure/city.py @@ -0,0 +1,626 @@ +from __future__ import annotations + +from collections import Counter, defaultdict +from collections.abc import Collection, Iterator +from itertools import chain + +from subways.consts import ( + DEFAULT_MODES_OVERGROUND, + DEFAULT_MODES_RAPID, +) +from subways.osm_element import el_center, el_id +from subways.structure.route import Route +from subways.structure.route_master import RouteMaster +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import ( + IdT, + OsmElementT, + TransfersT, + TransferT, +) + +ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count +ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count + +used_entrances = set() + + +def format_elid_list(ids: Collection[IdT]) -> str: + msg = ", ".join(sorted(ids)[:20]) + if len(ids) > 20: + msg += ", ..." + return msg + + +class City: + route_class = Route + + def __init__(self, city_data: dict, overground: bool = False) -> None: + self.validate_called = False + self.errors: list[str] = [] + self.warnings: list[str] = [] + self.notices: list[str] = [] + self.id = None + self.try_fill_int_attribute(city_data, "id") + self.name = city_data["name"] + self.country = city_data["country"] + self.continent = city_data["continent"] + self.overground = overground + if not overground: + self.try_fill_int_attribute(city_data, "num_stations") + self.try_fill_int_attribute(city_data, "num_lines", "0") + self.try_fill_int_attribute(city_data, "num_light_lines", "0") + self.try_fill_int_attribute(city_data, "num_interchanges", "0") + else: + self.try_fill_int_attribute(city_data, "num_tram_lines", "0") + self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") + self.try_fill_int_attribute(city_data, "num_bus_lines", "0") + self.try_fill_int_attribute(city_data, "num_other_lines", "0") + + # Acquiring list of networks and modes + networks = ( + None + if not city_data["networks"] + else city_data["networks"].split(":") + ) + if not networks or len(networks[-1]) == 0: + self.networks = [] + else: + self.networks = set( + filter(None, [x.strip() for x in networks[-1].split(";")]) + ) + if not networks or len(networks) < 2 or len(networks[0]) == 0: + if self.overground: + self.modes = DEFAULT_MODES_OVERGROUND + else: + self.modes = DEFAULT_MODES_RAPID + else: + self.modes = {x.strip() for x in networks[0].split(",")} + + # Reversing bbox so it is (xmin, ymin, xmax, ymax) + bbox = city_data["bbox"].split(",") + if len(bbox) == 4: + self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] + else: + self.bbox = None + + self.elements: dict[IdT, OsmElementT] = {} + self.stations: dict[IdT, list[StopArea]] = defaultdict(list) + self.routes: dict[str, RouteMaster] = {} # keys are route_master refs + self.masters: dict[IdT, OsmElementT] = {} # Route id → master element + self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) + self.transfers: list[set[StopArea]] = [] + self.station_ids: set[IdT] = set() + self.stops_and_platforms: set[IdT] = set() + self.recovery_data = None + + def try_fill_int_attribute( + self, city_data: dict, attr: str, default: str | None = None + ) -> None: + """Try to convert string value to int. Conversion is considered + to fail if one of the following is true: + * attr is not empty and data type casting fails; + * attr is empty and no default value is given. + In such cases the city is marked as bad by adding an error + to the city validation log. + """ + attr_value = city_data[attr] + if not attr_value and default is not None: + attr_value = default + + try: + attr_int = int(attr_value) + except ValueError: + print_value = ( + f"{city_data[attr]}" if city_data[attr] else "" + ) + self.error( + f"Configuration error: wrong value for {attr}: {print_value}" + ) + setattr(self, attr, 0) + else: + setattr(self, attr, attr_int) + + @staticmethod + def log_message(message: str, el: OsmElementT) -> str: + if el: + tags = el.get("tags", {}) + message += ' ({} {}, "{}")'.format( + el["type"], + el.get("id", el.get("ref")), + tags.get("name", tags.get("ref", "")), + ) + return message + + def notice(self, message: str, el: OsmElementT | None = None) -> None: + """This type of message may point to a potential problem.""" + msg = City.log_message(message, el) + self.notices.append(msg) + + def warn(self, message: str, el: OsmElementT | None = None) -> None: + """A warning is definitely a problem but is doesn't prevent + from building a routing file and doesn't invalidate the city. + """ + msg = City.log_message(message, el) + self.warnings.append(msg) + + def error(self, message: str, el: OsmElementT | None = None) -> None: + """Error is a critical problem that invalidates the city.""" + msg = City.log_message(message, el) + self.errors.append(msg) + + def contains(self, el: OsmElementT) -> bool: + center = el_center(el) + if center: + return ( + self.bbox[0] <= center[1] <= self.bbox[2] + and self.bbox[1] <= center[0] <= self.bbox[3] + ) + return False + + def add(self, el: OsmElementT) -> None: + if el["type"] == "relation" and "members" not in el: + return + + self.elements[el_id(el)] = el + if not (el["type"] == "relation" and "tags" in el): + return + + relation_type = el["tags"].get("type") + if relation_type == "route_master": + for m in el["members"]: + if m["type"] != "relation": + continue + + if el_id(m) in self.masters: + self.error("Route in two route_masters", m) + self.masters[el_id(m)] = el + + elif el["tags"].get("public_transport") == "stop_area": + if relation_type != "public_transport": + self.warn( + "stop_area relation with " + f"type={relation_type}, needed type=public_transport", + el, + ) + return + + warned_about_duplicates = False + for m in el["members"]: + stop_areas = self.stop_areas[el_id(m)] + if el in stop_areas and not warned_about_duplicates: + self.warn("Duplicate element in a stop area", el) + warned_about_duplicates = True + else: + stop_areas.append(el) + + def make_transfer(self, stoparea_group: OsmElementT) -> None: + transfer: set[StopArea] = set() + for m in stoparea_group["members"]: + k = el_id(m) + el = self.elements.get(k) + if not el: + # A stoparea_group member may validly not belong to the city + # while the stoparea_group does - near the city bbox boundary + continue + if "tags" not in el: + self.warn( + "An untagged object {} in a stop_area_group".format(k), + stoparea_group, + ) + continue + if ( + el["type"] != "relation" + or el["tags"].get("type") != "public_transport" + or el["tags"].get("public_transport") != "stop_area" + ): + continue + if k in self.stations: + stoparea = self.stations[k][0] + transfer.add(stoparea) + if stoparea.transfer: + # TODO: properly process such cases. + # Counterexample 1: Paris, + # Châtelet subway station <-> + # "Châtelet - Les Halles" railway station <-> + # Les Halles subway station + # Counterexample 2: Saint-Petersburg, transfers + # Витебский вокзал <-> + # Пушкинская <-> + # Звенигородская + self.warn( + "Stop area {} belongs to multiple interchanges".format( + k + ) + ) + stoparea.transfer = el_id(stoparea_group) + if len(transfer) > 1: + self.transfers.append(transfer) + + def extract_routes(self) -> None: + # Extract stations + processed_stop_areas = set() + for el in self.elements.values(): + if Station.is_station(el, self.modes): + # See PR https://github.com/mapsme/subways/pull/98 + if ( + el["type"] == "relation" + and el["tags"].get("type") != "multipolygon" + ): + rel_type = el["tags"].get("type") + self.warn( + "A railway station cannot be a relation of type " + f"{rel_type}", + el, + ) + continue + st = Station(el, self) + self.station_ids.add(st.id) + if st.id in self.stop_areas: + stations = [] + for sa in self.stop_areas[st.id]: + stations.append(StopArea(st, self, sa)) + else: + stations = [StopArea(st, self)] + + for station in stations: + if station.id not in processed_stop_areas: + processed_stop_areas.add(station.id) + for st_el in station.get_elements(): + self.stations[st_el].append(station) + + # Check that stops and platforms belong to + # a single stop_area + for sp in chain(station.stops, station.platforms): + if sp in self.stops_and_platforms: + self.notice( + f"A stop or a platform {sp} belongs to " + "multiple stop areas, might be correct" + ) + else: + self.stops_and_platforms.add(sp) + + # Extract routes + for el in self.elements.values(): + if Route.is_route(el, self.modes): + if el["tags"].get("access") in ("no", "private"): + continue + route_id = el_id(el) + master = self.masters.get(route_id, None) + if self.networks: + network = Route.get_network(el) + if master: + master_network = Route.get_network(master) + else: + master_network = None + if ( + network not in self.networks + and master_network not in self.networks + ): + continue + + route = self.route_class(el, self, master) + if not route.stops: + self.warn("Route has no stops", el) + continue + elif len(route.stops) == 1: + self.warn("Route has only one stop", el) + continue + + k = el_id(master) if master else route.ref + if k not in self.routes: + self.routes[k] = RouteMaster(self, master) + self.routes[k].add(route) + + # Sometimes adding a route to a newly initialized RouteMaster + # can fail + if len(self.routes[k]) == 0: + del self.routes[k] + + # And while we're iterating over relations, find interchanges + if ( + el["type"] == "relation" + and el.get("tags", {}).get("public_transport", None) + == "stop_area_group" + ): + self.make_transfer(el) + + # Filter transfers, leaving only stations that belong to routes + own_stopareas = set(self.stopareas()) + + self.transfers = [ + inner_transfer + for inner_transfer in ( + own_stopareas.intersection(transfer) + for transfer in self.transfers + ) + if len(inner_transfer) > 1 + ] + + def __iter__(self) -> Iterator[RouteMaster]: + return iter(self.routes.values()) + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_master in self: + for stoparea in route_master.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + @property + def is_good(self) -> bool: + if not (self.errors or self.validate_called): + raise RuntimeError( + "You mustn't refer to City.is_good property before calling " + "the City.validate() method unless an error already occurred." + ) + return len(self.errors) == 0 + + def get_validation_result(self) -> dict: + result = { + "name": self.name, + "country": self.country, + "continent": self.continent, + "stations_found": getattr(self, "found_stations", 0), + "transfers_found": getattr(self, "found_interchanges", 0), + "unused_entrances": getattr(self, "unused_entrances", 0), + "networks": getattr(self, "found_networks", 0), + } + if not self.overground: + result.update( + { + "subwayl_expected": getattr(self, "num_lines", 0), + "lightrl_expected": getattr(self, "num_light_lines", 0), + "subwayl_found": getattr(self, "found_lines", 0), + "lightrl_found": getattr(self, "found_light_lines", 0), + "stations_expected": getattr(self, "num_stations", 0), + "transfers_expected": getattr(self, "num_interchanges", 0), + } + ) + else: + result.update( + { + "stations_expected": 0, + "transfers_expected": 0, + "busl_expected": getattr(self, "num_bus_lines", 0), + "trolleybusl_expected": getattr( + self, "num_trolleybus_lines", 0 + ), + "traml_expected": getattr(self, "num_tram_lines", 0), + "otherl_expected": getattr(self, "num_other_lines", 0), + "busl_found": getattr(self, "found_bus_lines", 0), + "trolleybusl_found": getattr( + self, "found_trolleybus_lines", 0 + ), + "traml_found": getattr(self, "found_tram_lines", 0), + "otherl_found": getattr(self, "found_other_lines", 0), + } + ) + result["warnings"] = self.warnings + result["errors"] = self.errors + result["notices"] = self.notices + return result + + def count_unused_entrances(self) -> None: + global used_entrances + stop_areas = set() + for el in self.elements.values(): + if ( + el["type"] == "relation" + and "tags" in el + and el["tags"].get("public_transport") == "stop_area" + and "members" in el + ): + stop_areas.update([el_id(m) for m in el["members"]]) + unused = [] + not_in_sa = [] + for el in self.elements.values(): + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + i = el_id(el) + if i in self.stations: + used_entrances.add(i) + if i not in stop_areas: + not_in_sa.append(i) + if i not in self.stations: + unused.append(i) + self.unused_entrances = len(unused) + self.entrances_not_in_stop_areas = len(not_in_sa) + if unused: + self.notice( + f"{len(unused)} subway entrances are not connected to a " + f"station: {format_elid_list(unused)}" + ) + if not_in_sa: + self.notice( + f"{len(not_in_sa)} subway entrances are not in stop_area " + f"relations: {format_elid_list(not_in_sa)}" + ) + + def validate_lines(self) -> None: + self.found_light_lines = len( + [x for x in self.routes.values() if x.mode != "subway"] + ) + self.found_lines = len(self.routes) - self.found_light_lines + if self.found_lines != self.num_lines: + self.error( + "Found {} subway lines, expected {}".format( + self.found_lines, self.num_lines + ) + ) + if self.found_light_lines != self.num_light_lines: + self.error( + "Found {} light rail lines, expected {}".format( + self.found_light_lines, self.num_light_lines + ) + ) + + def validate_overground_lines(self) -> None: + self.found_tram_lines = len( + [x for x in self.routes.values() if x.mode == "tram"] + ) + self.found_bus_lines = len( + [x for x in self.routes.values() if x.mode == "bus"] + ) + self.found_trolleybus_lines = len( + [x for x in self.routes.values() if x.mode == "trolleybus"] + ) + self.found_other_lines = len( + [ + x + for x in self.routes.values() + if x.mode not in ("bus", "trolleybus", "tram") + ] + ) + if self.found_tram_lines != self.num_tram_lines: + log_function = ( + self.error if self.found_tram_lines == 0 else self.notice + ) + log_function( + "Found {} tram lines, expected {}".format( + self.found_tram_lines, self.num_tram_lines + ), + ) + + def validate(self) -> None: + networks = Counter() + self.found_stations = 0 + unused_stations = set(self.station_ids) + for rmaster in self.routes.values(): + networks[str(rmaster.network)] += 1 + if not self.overground: + rmaster.check_return_routes() + route_stations = set() + for sa in rmaster.stopareas(): + route_stations.add(sa.transfer or sa.id) + unused_stations.discard(sa.station.id) + self.found_stations += len(route_stations) + if unused_stations: + self.unused_stations = len(unused_stations) + self.notice( + "{} unused stations: {}".format( + self.unused_stations, format_elid_list(unused_stations) + ) + ) + self.count_unused_entrances() + self.found_interchanges = len(self.transfers) + + if self.overground: + self.validate_overground_lines() + else: + self.validate_lines() + + if self.found_stations != self.num_stations: + msg = "Found {} stations in routes, expected {}".format( + self.found_stations, self.num_stations + ) + log_function = ( + self.error + if self.num_stations > 0 + and not ( + 0 + <= (self.num_stations - self.found_stations) + / self.num_stations + <= ALLOWED_STATIONS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + if self.found_interchanges != self.num_interchanges: + msg = "Found {} interchanges, expected {}".format( + self.found_interchanges, self.num_interchanges + ) + log_function = ( + self.error + if self.num_interchanges != 0 + and not ( + (self.num_interchanges - self.found_interchanges) + / self.num_interchanges + <= ALLOWED_TRANSFERS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + self.found_networks = len(networks) + if len(networks) > max(1, len(self.networks)): + n_str = "; ".join( + ["{} ({})".format(k, v) for k, v in networks.items()] + ) + self.notice("More than one network: {}".format(n_str)) + + self.validate_called = True + + def calculate_distances(self) -> None: + for route_master in self: + for route in route_master: + route.calculate_distances() + + +def find_transfers( + elements: list[OsmElementT], cities: Collection[City] +) -> TransfersT: + """As for now, two Cities may contain the same stoparea, but those + StopArea instances would have different python id. So we don't store + references to StopAreas, but only their ids. This is important at + inter-city interchanges. + """ + stop_area_groups = [ + el + for el in elements + if el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" + ] + + stopareas_in_cities_ids = set( + stoparea.id + for city in cities + if city.is_good + for stoparea in city.stopareas() + ) + + transfers = [] + for stop_area_group in stop_area_groups: + transfer: TransferT = set( + member_id + for member_id in ( + el_id(member) for member in stop_area_group["members"] + ) + if member_id in stopareas_in_cities_ids + ) + if len(transfer) > 1: + transfers.append(transfer) + return transfers + + +def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: + global used_entrances + features = [] + for el in elements: + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + if el_id(el) not in used_entrances: + geometry = {"type": "Point", "coordinates": el_center(el)} + properties = { + k: v + for k, v in el["tags"].items() + if k not in ("railway", "entrance") + } + features.append( + { + "type": "Feature", + "geometry": geometry, + "properties": properties, + } + ) + return {"type": "FeatureCollection", "features": features} diff --git a/subways/structure/route.py b/subways/structure/route.py new file mode 100644 index 00000000..926733ed --- /dev/null +++ b/subways/structure/route.py @@ -0,0 +1,903 @@ +from __future__ import annotations + +import re +import typing +from collections.abc import Callable, Iterator +from itertools import islice + +from subways.consts import ( + CONSTRUCTION_KEYS, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, +) +from subways.css_colours import normalize_colour +from subways.geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + project_on_line, +) +from subways.osm_element import el_id, el_center +from subways.structure.route_stop import RouteStop +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import CriticalValidationError, IdT, OsmElementT, RailT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") + +ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees +DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees + + +def get_start_end_times( + opening_hours: str, +) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: + """Very simplified method to parse OSM opening_hours tag. + We simply take the first HH:MM-HH:MM substring which is the most probable + opening hours interval for the most of the weekdays. + """ + start_time, end_time = None, None + m = START_END_TIMES_RE.match(opening_hours) + if m: + ints = tuple(map(int, m.groups())) + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) + return start_time, end_time + + +def osm_interval_to_seconds(interval_str: str) -> int | None: + """Convert to int an OSM value for 'interval'/'headway' tag + which may be in these formats: + HH:MM:SS, + HH:MM, + MM, + M + (https://wiki.openstreetmap.org/wiki/Key:interval#Format) + """ + hours, minutes, seconds = 0, 0, 0 + semicolon_count = interval_str.count(":") + try: + if semicolon_count == 0: + minutes = int(interval_str) + elif semicolon_count == 1: + hours, minutes = map(int, interval_str.split(":")) + elif semicolon_count == 2: + hours, minutes, seconds = map(int, interval_str.split(":")) + else: + return None + except ValueError: + return None + return seconds + 60 * minutes + 60 * 60 * hours + + +class Route: + """The longest route for a city with a unique ref.""" + + @staticmethod + def is_route(el: OsmElementT, modes: set[str]) -> bool: + if ( + el["type"] != "relation" + or el.get("tags", {}).get("type") != "route" + ): + return False + if "members" not in el: + return False + if el["tags"].get("route") not in modes: + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + if "ref" not in el["tags"] and "name" not in el["tags"]: + return False + return True + + @staticmethod + def get_network(relation: OsmElementT) -> str | None: + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] + return None + + @staticmethod + def get_interval(tags: dict) -> int | None: + v = None + for k in ("interval", "headway"): + if k in tags: + v = tags[k] + break + else: + for kk in tags: + if kk.startswith(k + ":"): + v = tags[kk] + break + if not v: + return None + return osm_interval_to_seconds(v) + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_stop in self: + stoparea = route_stop.stoparea + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def __init__( + self, + relation: OsmElementT, + city: City, + master: OsmElementT | None = None, + ) -> None: + assert Route.is_route( + relation, city.modes + ), f"The relation does not seem to be a route: {relation}" + self.city = city + self.element: OsmElementT = relation + self.id: IdT = el_id(relation) + + self.ref = None + self.name = None + self.mode = None + self.colour = None + self.infill = None + self.network = None + self.interval = None + self.start_time = None + self.end_time = None + self.is_circular = False + self.stops: list[RouteStop] = [] + # Would be a list of (lon, lat) for the longest stretch. Can be empty. + self.tracks = None + # Index of the first stop that is located on/near the self.tracks + self.first_stop_on_rails_index = None + # Index of the last stop that is located on/near the self.tracks + self.last_stop_on_rails_index = None + + self.process_tags(master) + stop_position_elements = self.process_stop_members() + self.process_tracks(stop_position_elements) + + def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: + line_nodes: set[IdT] = set() + last_track: list[IdT] = [] + track: list[IdT] = [] + warned_about_holes = False + for m in self.element["members"]: + el = self.city.elements.get(el_id(m), None) + if not el or not StopArea.is_track(el): + continue + if "nodes" not in el or len(el["nodes"]) < 2: + self.city.error("Cannot find nodes in a railway", el) + continue + nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] + if m["role"] == "backward": + nodes.reverse() + line_nodes.update(nodes) + if not track: + is_first = True + track.extend(nodes) + else: + new_segment = list(nodes) # copying + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + elif new_segment[-1] == track[-1]: + track.extend(reversed(new_segment[:-1])) + elif is_first and track[0] in ( + new_segment[0], + new_segment[-1], + ): + # We can reverse the track and try again + track.reverse() + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + else: + track.extend(reversed(new_segment[:-1])) + else: + # Store the track if it is long and clean it + if not warned_about_holes: + self.city.warn( + "Hole in route rails near node {}".format( + track[-1] + ), + self.element, + ) + warned_about_holes = True + if len(track) > len(last_track): + last_track = track + track = [] + is_first = False + if len(track) > len(last_track): + last_track = track + # Remove duplicate points + last_track = [ + last_track[i] + for i in range(0, len(last_track)) + if i == 0 or last_track[i - 1] != last_track[i] + ] + return last_track, line_nodes + + def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: + projected = [project_on_line(x.stop, self.tracks) for x in self.stops] + + def stop_near_tracks_criterion(stop_index: int) -> bool: + return ( + projected[stop_index]["projected_point"] is not None + and distance( + self.stops[stop_index].stop, + projected[stop_index]["projected_point"], + ) + <= MAX_DISTANCE_STOP_TO_LINE + ) + + return projected, stop_near_tracks_criterion + + def project_stops_on_line(self) -> dict: + projected, stop_near_tracks_criterion = self.get_stop_projections() + + projected_stops_data = { + "first_stop_on_rails_index": None, + "last_stop_on_rails_index": None, + "stops_on_longest_line": [], # list [{'route_stop': RouteStop, + # 'coords': LonLat, + # 'positions_on_rails': [] } + } + first_index = 0 + while first_index < len(self.stops) and not stop_near_tracks_criterion( + first_index + ): + first_index += 1 + projected_stops_data["first_stop_on_rails_index"] = first_index + + last_index = len(self.stops) - 1 + while last_index > projected_stops_data[ + "first_stop_on_rails_index" + ] and not stop_near_tracks_criterion(last_index): + last_index -= 1 + projected_stops_data["last_stop_on_rails_index"] = last_index + + for i, route_stop in enumerate(self.stops): + if not first_index <= i <= last_index: + continue + + if projected[i]["projected_point"] is None: + self.city.error( + 'Stop "{}" {} is nowhere near the tracks'.format( + route_stop.stoparea.name, route_stop.stop + ), + self.element, + ) + else: + stop_data = { + "route_stop": route_stop, + "coords": None, + "positions_on_rails": None, + } + projected_point = projected[i]["projected_point"] + # We've got two separate stations with a good stretch of + # railway tracks between them. Put these on tracks. + d = round(distance(route_stop.stop, projected_point)) + if d > MAX_DISTANCE_STOP_TO_LINE: + self.city.notice( + 'Stop "{}" {} is {} meters from the tracks'.format( + route_stop.stoparea.name, route_stop.stop, d + ), + self.element, + ) + else: + stop_data["coords"] = projected_point + stop_data["positions_on_rails"] = projected[i][ + "positions_on_line" + ] + projected_stops_data["stops_on_longest_line"].append(stop_data) + return projected_stops_data + + def calculate_distances(self) -> None: + dist = 0 + vertex = 0 + for i, stop in enumerate(self.stops): + if i > 0: + direct = distance(stop.stop, self.stops[i - 1].stop) + d_line = None + if ( + self.first_stop_on_rails_index + <= i + <= self.last_stop_on_rails_index + ): + d_line = distance_on_line( + self.stops[i - 1].stop, stop.stop, self.tracks, vertex + ) + if d_line and direct - 10 <= d_line[0] <= direct * 2: + vertex = d_line[1] + dist += round(d_line[0]) + else: + dist += round(direct) + stop.distance = dist + + def process_tags(self, master: OsmElementT) -> None: + relation = self.element + master_tags = {} if not master else master["tags"] + if "ref" not in relation["tags"] and "ref" not in master_tags: + self.city.notice("Missing ref on a route", relation) + self.ref = relation["tags"].get( + "ref", master_tags.get("ref", relation["tags"].get("name", None)) + ) + self.name = relation["tags"].get("name", None) + self.mode = relation["tags"]["route"] + if ( + "colour" not in relation["tags"] + and "colour" not in master_tags + and self.mode != "tram" + ): + self.city.notice("Missing colour on a route", relation) + try: + self.colour = normalize_colour( + relation["tags"].get("colour", master_tags.get("colour", None)) + ) + except ValueError as e: + self.colour = None + self.city.warn(str(e), relation) + try: + self.infill = normalize_colour( + relation["tags"].get( + "colour:infill", master_tags.get("colour:infill", None) + ) + ) + except ValueError as e: + self.infill = None + self.city.warn(str(e), relation) + self.network = Route.get_network(relation) + self.interval = Route.get_interval( + relation["tags"] + ) or Route.get_interval(master_tags) + self.start_time, self.end_time = get_start_end_times( + relation["tags"].get( + "opening_hours", master_tags.get("opening_hours", "") + ) + ) + if relation["tags"].get("public_transport:version") == "1": + self.city.warn( + "Public transport version is 1, which means the route " + "is an unsorted pile of objects", + relation, + ) + + def process_stop_members(self) -> list[OsmElementT]: + stations: set[StopArea] = set() # temporary for recording stations + seen_stops = False + seen_platforms = False + repeat_pos = None + stop_position_elements: list[OsmElementT] = [] + for m in self.element["members"]: + if "inactive" in m["role"]: + continue + k = el_id(m) + if k in self.city.stations: + st_list = self.city.stations[k] + st = st_list[0] + if len(st_list) > 1: + self.city.error( + f"Ambiguous station {st.name} in route. Please " + "use stop_position or split interchange stations", + self.element, + ) + el = self.city.elements[k] + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + if m["role"] and actual_role not in m["role"]: + self.city.warn( + "Wrong role '{}' for {} {}".format( + m["role"], actual_role, k + ), + self.element, + ) + if repeat_pos is None: + if not self.stops or st not in stations: + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + elif self.stops[-1].stoparea.id == st.id: + stop = self.stops[-1] + else: + # We've got a repeat + if ( + (seen_stops and seen_platforms) + or ( + actual_role == "stop" + and not seen_platforms + ) + or ( + actual_role == "platform" + and not seen_stops + ) + ): + # Circular route! + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + else: + repeat_pos = 0 + if repeat_pos is not None: + if repeat_pos >= len(self.stops): + continue + # Check that the type matches + if (actual_role == "stop" and seen_stops) or ( + actual_role == "platform" and seen_platforms + ): + self.city.error( + 'Found an out-of-place {}: "{}" ({})'.format( + actual_role, el["tags"].get("name", ""), k + ), + self.element, + ) + continue + # Find the matching stop starting with index repeat_pos + while ( + repeat_pos < len(self.stops) + and self.stops[repeat_pos].stoparea.id != st.id + ): + repeat_pos += 1 + if repeat_pos >= len(self.stops): + self.city.error( + "Incorrect order of {}s at {}".format( + actual_role, k + ), + self.element, + ) + continue + stop = self.stops[repeat_pos] + + stop.add(m, self.element, self.city) + if repeat_pos is None: + seen_stops |= stop.seen_stop or stop.seen_station + seen_platforms |= stop.seen_platform + + if StopArea.is_stop(el): + stop_position_elements.append(el) + + continue + + if k not in self.city.elements: + if "stop" in m["role"] or "platform" in m["role"]: + raise CriticalValidationError( + f"{m['role']} {m['type']} {m['ref']} for route " + f"relation {self.element['id']} is not in the dataset" + ) + continue + el = self.city.elements[k] + if "tags" not in el: + self.city.error( + f"Untagged object {k} in a route", self.element + ) + continue + + is_under_construction = False + for ck in CONSTRUCTION_KEYS: + if ck in el["tags"]: + self.city.warn( + f"Under construction {m['role'] or 'feature'} {k} " + "in route. Consider setting 'inactive' role or " + "removing construction attributes", + self.element, + ) + is_under_construction = True + break + if is_under_construction: + continue + + if Station.is_station(el, self.city.modes): + # A station may be not included in this route due to previous + # 'stop area has multiple stations' error. No other error + # message is needed. + pass + elif el["tags"].get("railway") in ("station", "halt"): + self.city.error( + "Missing station={} on a {}".format(self.mode, m["role"]), + el, + ) + else: + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + self.city.error( + f"{actual_role} {m['type']} {m['ref']} is not " + "connected to a station in route", + self.element, + ) + elif not StopArea.is_track(el): + self.city.warn( + "Unknown member type for {} {} in route".format( + m["type"], m["ref"] + ), + self.element, + ) + return stop_position_elements + + def process_tracks( + self, stop_position_elements: list[OsmElementT] + ) -> None: + tracks, line_nodes = self.build_longest_line() + + for stop_el in stop_position_elements: + stop_id = el_id(stop_el) + if stop_id not in line_nodes: + self.city.warn( + 'Stop position "{}" ({}) is not on tracks'.format( + stop_el["tags"].get("name", ""), stop_id + ), + self.element, + ) + + # self.tracks would be a list of (lon, lat) for the longest stretch. + # Can be empty. + self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] + if ( + None in self.tracks + ): # usually, extending BBOX for the city is needed + self.tracks = [] + for n in filter(lambda x: x not in self.city.elements, tracks): + self.city.warn( + f"The dataset is missing the railway tracks node {n}", + self.element, + ) + break + + if len(self.stops) > 1: + self.is_circular = ( + self.stops[0].stoparea == self.stops[-1].stoparea + ) + if ( + self.is_circular + and self.tracks + and self.tracks[0] != self.tracks[-1] + ): + self.city.warn( + "Non-closed rail sequence in a circular route", + self.element, + ) + + projected_stops_data = self.project_stops_on_line() + self.check_and_recover_stops_order(projected_stops_data) + self.apply_projected_stops_data(projected_stops_data) + + def apply_projected_stops_data(self, projected_stops_data: dict) -> None: + """Store better stop coordinates and indexes of first/last stops + that lie on a continuous track line, to the instance attributes. + """ + for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): + setattr(self, attr, projected_stops_data[attr]) + + for stop_data in projected_stops_data["stops_on_longest_line"]: + route_stop = stop_data["route_stop"] + route_stop.positions_on_rails = stop_data["positions_on_rails"] + if stop_coords := stop_data["coords"]: + route_stop.stop = stop_coords + + def get_extended_tracks(self) -> RailT: + """Amend tracks with points of leading/trailing self.stops + that were not projected onto the longest tracks line. + Return a new array. + """ + if self.first_stop_on_rails_index >= len(self.stops): + tracks = [route_stop.stop for route_stop in self.stops] + else: + tracks = ( + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i < self.first_stop_on_rails_index + ] + + self.tracks + + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i > self.last_stop_on_rails_index + ] + ) + return tracks + + def get_truncated_tracks(self, tracks: RailT) -> RailT: + """Truncate leading/trailing segments of `tracks` param + that are beyond the first and last stop locations. + Return a new array. + """ + if self.is_circular: + return tracks.copy() + + first_stop_location = find_segment(self.stops[0].stop, tracks, 0) + last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) + + if last_stop_location != (None, None): + seg2, u2 = last_stop_location + if u2 == 0.0: + # Make seg2 the segment the last_stop_location is + # at the middle or end of + seg2 -= 1 + # u2 = 1.0 + if seg2 + 2 < len(tracks): + tracks = tracks[0 : seg2 + 2] # noqa E203 + tracks[-1] = self.stops[-1].stop + + if first_stop_location != (None, None): + seg1, u1 = first_stop_location + if u1 == 1.0: + # Make seg1 the segment the first_stop_location is + # at the beginning or middle of + seg1 += 1 + # u1 = 0.0 + if seg1 > 0: + tracks = tracks[seg1:] + tracks[0] = self.stops[0].stop + + return tracks + + def are_tracks_complete(self) -> bool: + return ( + self.first_stop_on_rails_index == 0 + and self.last_stop_on_rails_index == len(self) - 1 + ) + + def get_tracks_geometry(self) -> RailT: + tracks = self.get_extended_tracks() + tracks = self.get_truncated_tracks(tracks) + return tracks + + def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: + disorder_warnings = [] + disorder_errors = [] + for i, route_stop in enumerate( + islice(self.stops, 1, len(self.stops) - 1), start=1 + ): + angle = angle_between( + self.stops[i - 1].stop, + route_stop.stop, + self.stops[i + 1].stop, + ) + if angle < ALLOWED_ANGLE_BETWEEN_STOPS: + msg = ( + "Angle between stops around " + f'"{route_stop.stoparea.name}" {route_stop.stop} ' + f"is too narrow, {angle} degrees" + ) + if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: + disorder_errors.append(msg) + else: + disorder_warnings.append(msg) + return disorder_warnings, disorder_errors + + def check_stops_order_on_tracks_direct( + self, stop_sequence: Iterator[dict] + ) -> str | None: + """Checks stops order on tracks, following stop_sequence + in direct order only. + :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', + 'coords'} for RouteStops that belong to the longest contiguous + sequence of tracks in a route. + :return: error message on the first order violation or None. + """ + allowed_order_violations = 1 if self.is_circular else 0 + max_position_on_rails = -1 + for stop_data in stop_sequence: + positions_on_rails = stop_data["positions_on_rails"] + suitable_occurrence = 0 + while ( + suitable_occurrence < len(positions_on_rails) + and positions_on_rails[suitable_occurrence] + < max_position_on_rails + ): + suitable_occurrence += 1 + if suitable_occurrence == len(positions_on_rails): + if allowed_order_violations > 0: + suitable_occurrence -= 1 + allowed_order_violations -= 1 + else: + route_stop = stop_data["route_stop"] + return ( + "Stops on tracks are unordered near " + f'"{route_stop.stoparea.name}" {route_stop.stop}' + ) + max_position_on_rails = positions_on_rails[suitable_occurrence] + + def check_stops_order_on_tracks( + self, projected_stops_data: dict + ) -> str | None: + """Checks stops order on tracks, trying direct and reversed + order of stops in the stop_sequence. + :param projected_stops_data: info about RouteStops that belong to the + longest contiguous sequence of tracks in a route. May be changed + if tracks reversing is performed. + :return: error message on the first order violation or None. + """ + error_message = self.check_stops_order_on_tracks_direct( + projected_stops_data["stops_on_longest_line"] + ) + if error_message: + error_message_reversed = self.check_stops_order_on_tracks_direct( + reversed(projected_stops_data["stops_on_longest_line"]) + ) + if error_message_reversed is None: + error_message = None + self.city.warn( + "Tracks seem to go in the opposite direction to stops", + self.element, + ) + self.tracks.reverse() + new_projected_stops_data = self.project_stops_on_line() + projected_stops_data.update(new_projected_stops_data) + + return error_message + + def check_stops_order( + self, projected_stops_data: dict + ) -> tuple[list[str], list[str]]: + ( + angle_disorder_warnings, + angle_disorder_errors, + ) = self.check_stops_order_by_angle() + disorder_on_tracks_error = self.check_stops_order_on_tracks( + projected_stops_data + ) + disorder_warnings = angle_disorder_warnings + disorder_errors = angle_disorder_errors + if disorder_on_tracks_error: + disorder_errors.append(disorder_on_tracks_error) + return disorder_warnings, disorder_errors + + def check_and_recover_stops_order( + self, projected_stops_data: dict + ) -> None: + """ + :param projected_stops_data: may change if we need to reverse tracks + """ + disorder_warnings, disorder_errors = self.check_stops_order( + projected_stops_data + ) + if disorder_warnings or disorder_errors: + resort_success = False + if self.city.recovery_data: + resort_success = self.try_resort_stops() + if resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.warn( + "Fixed with recovery data: " + msg, self.element + ) + + if not resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.error(msg, self.element) + + def try_resort_stops(self) -> bool: + """Precondition: self.city.recovery_data is not None. + Return success of station order recovering.""" + self_stops = {} # station name => RouteStop + for stop in self.stops: + station = stop.stoparea.station + stop_name = station.name + if stop_name == "?" and station.int_name: + stop_name = station.int_name + # We won't programmatically recover routes with repeating stations: + # such cases are rare and deserves manual verification + if stop_name in self_stops: + return False + self_stops[stop_name] = stop + + route_id = (self.colour, self.ref) + if route_id not in self.city.recovery_data: + return False + + stop_names = list(self_stops.keys()) + suitable_itineraries = [] + for itinerary in self.city.recovery_data[route_id]: + itinerary_stop_names = [ + stop["name"] for stop in itinerary["stations"] + ] + if not ( + len(stop_names) == len(itinerary_stop_names) + and sorted(stop_names) == sorted(itinerary_stop_names) + ): + continue + big_station_displacement = False + for it_stop in itinerary["stations"]: + name = it_stop["name"] + it_stop_center = it_stop["center"] + self_stop_center = self_stops[name].stoparea.station.center + if ( + distance(it_stop_center, self_stop_center) + > DISPLACEMENT_TOLERANCE + ): + big_station_displacement = True + break + if not big_station_displacement: + suitable_itineraries.append(itinerary) + + if len(suitable_itineraries) == 0: + return False + elif len(suitable_itineraries) == 1: + matching_itinerary = suitable_itineraries[0] + else: + from_tag = self.element["tags"].get("from") + to_tag = self.element["tags"].get("to") + if not from_tag and not to_tag: + return False + matching_itineraries = [ + itin + for itin in suitable_itineraries + if from_tag + and itin["from"] == from_tag + or to_tag + and itin["to"] == to_tag + ] + if len(matching_itineraries) != 1: + return False + matching_itinerary = matching_itineraries[0] + self.stops = [ + self_stops[stop["name"]] for stop in matching_itinerary["stations"] + ] + return True + + def get_end_transfers(self) -> tuple[IdT, IdT]: + """Using transfer ids because a train can arrive at different + stations within a transfer. But disregard transfer that may give + an impression of a circular route (for example, + Simonis / Elisabeth station and route 2 in Brussels). + """ + return ( + (self[0].stoparea.id, self[-1].stoparea.id) + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ) + else ( + self[0].stoparea.transfer or self[0].stoparea.id, + self[-1].stoparea.transfer or self[-1].stoparea.id, + ) + ) + + def get_transfers_sequence(self) -> list[IdT]: + """Return a list of stoparea or transfer (if not None) ids.""" + transfer_seq = [ + stop.stoparea.transfer or stop.stoparea.id for stop in self + ] + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ): + transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() + return transfer_seq + + def __len__(self) -> int: + return len(self.stops) + + def __getitem__(self, i) -> RouteStop: + return self.stops[i] + + def __iter__(self) -> Iterator[RouteStop]: + return iter(self.stops) + + def __repr__(self) -> str: + return ( + "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " + "circular={}, num_stops={}, line_length={} m, from={}, to={}" + ).format( + self.id, + self.mode, + self.ref, + self.name, + self.network, + self.interval, + self.is_circular, + len(self.stops), + self.stops[-1].distance, + self.stops[0], + self.stops[-1], + ) diff --git a/subways/structure/route_master.py b/subways/structure/route_master.py new file mode 100644 index 00000000..36ab1484 --- /dev/null +++ b/subways/structure/route_master.py @@ -0,0 +1,464 @@ +from __future__ import annotations + +import typing +from collections.abc import Iterator +from typing import TypeVar + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.css_colours import normalize_colour +from subways.geom_utils import distance, project_on_line +from subways.osm_element import el_id +from subways.structure.route import Route +from subways.structure.stop_area import StopArea +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.route_stop import RouteStop + + +SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters + +T = TypeVar("T") + + +class RouteMaster: + def __init__(self, city: City, master: OsmElementT = None) -> None: + self.city = city + self.routes = [] + self.best: Route = None + self.id: IdT = el_id(master) + self.has_master = master is not None + self.interval_from_master = False + if master: + self.ref = master["tags"].get( + "ref", master["tags"].get("name", None) + ) + try: + self.colour = normalize_colour( + master["tags"].get("colour", None) + ) + except ValueError: + self.colour = None + try: + self.infill = normalize_colour( + master["tags"].get("colour:infill", None) + ) + except ValueError: + self.infill = None + self.network = Route.get_network(master) + self.mode = master["tags"].get( + "route_master", None + ) # This tag is required, but okay + self.name = master["tags"].get("name", None) + self.interval = Route.get_interval(master["tags"]) + self.interval_from_master = self.interval is not None + else: + self.ref = None + self.colour = None + self.infill = None + self.network = None + self.mode = None + self.name = None + self.interval = None + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route in self: + for stoparea in route.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def add(self, route: Route) -> None: + if not self.network: + self.network = route.network + elif route.network and route.network != self.network: + self.city.error( + 'Route has different network ("{}") from master "{}"'.format( + route.network, self.network + ), + route.element, + ) + + if not self.colour: + self.colour = route.colour + elif route.colour and route.colour != self.colour: + self.city.notice( + 'Route "{}" has different colour from master "{}"'.format( + route.colour, self.colour + ), + route.element, + ) + + if not self.infill: + self.infill = route.infill + elif route.infill and route.infill != self.infill: + self.city.notice( + ( + f'Route "{route.infill}" has different infill colour ' + f'from master "{self.infill}"' + ), + route.element, + ) + + if not self.ref: + self.ref = route.ref + elif route.ref != self.ref: + self.city.notice( + 'Route "{}" has different ref from master "{}"'.format( + route.ref, self.ref + ), + route.element, + ) + + if not self.name: + self.name = route.name + + if not self.mode: + self.mode = route.mode + elif route.mode != self.mode: + self.city.error( + "Incompatible PT mode: master has {} and route has {}".format( + self.mode, route.mode + ), + route.element, + ) + return + + if not self.interval_from_master and route.interval: + if not self.interval: + self.interval = route.interval + else: + self.interval = min(self.interval, route.interval) + + # Choose minimal id for determinancy + if not self.has_master and (not self.id or self.id > route.id): + self.id = route.id + + self.routes.append(route) + if ( + not self.best + or len(route.stops) > len(self.best.stops) + or ( + # Choose route with minimal id for determinancy + len(route.stops) == len(self.best.stops) + and route.element["id"] < self.best.element["id"] + ) + ): + self.best = route + + def get_meaningful_routes(self) -> list[Route]: + return [route for route in self if len(route) >= 2] + + def find_twin_routes(self) -> dict[Route, Route]: + """Two non-circular routes are twins if they have the same end + stations and opposite directions, and the number of stations is + the same or almost the same. We'll then find stops that are present + in one direction and is missing in another direction - to warn. + """ + + twin_routes = {} # route => "twin" route + + for route in self.get_meaningful_routes(): + if route.is_circular: + continue # Difficult to calculate. TODO(?) in the future + if route in twin_routes: + continue + + route_transfer_ids = set(route.get_transfers_sequence()) + ends = route.get_end_transfers() + ends_reversed = ends[::-1] + + twin_candidates = [ + r + for r in self + if not r.is_circular + and r not in twin_routes + and r.get_end_transfers() == ends_reversed + # If absolute or relative difference in station count is large, + # possibly it's an express version of a route - skip it. + and ( + abs(len(r) - len(route)) <= 2 + or abs(len(r) - len(route)) / max(len(r), len(route)) + <= 0.2 + ) + ] + + if not twin_candidates: + continue + + twin_route = min( + twin_candidates, + key=lambda r: len( + route_transfer_ids ^ set(r.get_transfers_sequence()) + ), + ) + twin_routes[route] = twin_route + twin_routes[twin_route] = route + + return twin_routes + + def check_return_routes(self) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = self.get_meaningful_routes() + + if len(meaningful_routes) == 0: + self.city.error( + f"An empty route master {self.id}. " + "Please set construction:route if it is under construction" + ) + elif len(meaningful_routes) == 1: + log_function = ( + self.city.error + if not self.best.is_circular + else self.city.notice + ) + log_function( + "Only one route in route_master. " + "Please check if it needs a return route", + self.best.element, + ) + else: + self.check_return_circular_routes() + self.check_return_noncircular_routes() + + def check_return_noncircular_routes(self) -> None: + routes = [ + route + for route in self.get_meaningful_routes() + if not route.is_circular + ] + all_ends = {route.get_end_transfers(): route for route in routes} + for route in routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.city.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = self.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def check_return_circular_routes(self) -> None: + routes = { + route + for route in self.get_meaningful_routes() + if route.is_circular + } + routes_having_backward = set() + + for route in routes: + if route in routes_having_backward: + continue + transfer_sequence1 = [ + stop.stoparea.transfer or stop.stoparea.id for stop in route + ] + transfer_sequence1.pop() + for potential_backward_route in routes - {route}: + transfer_sequence2 = [ + stop.stoparea.transfer or stop.stoparea.id + for stop in potential_backward_route + ][ + -2::-1 + ] # truncate repeated first stop and reverse + common_subsequence = self.find_common_circular_subsequence( + transfer_sequence1, transfer_sequence2 + ) + if len(common_subsequence) >= 0.8 * min( + len(transfer_sequence1), len(transfer_sequence2) + ): + routes_having_backward.add(route) + routes_having_backward.add(potential_backward_route) + break + + for route in routes - routes_having_backward: + self.city.notice( + "Route does not have a return direction", route.element + ) + + @staticmethod + def find_common_circular_subsequence( + seq1: list[T], seq2: list[T] + ) -> list[T]: + """seq1 and seq2 are supposed to be stops of some circular routes. + Prerequisites to rely on the result: + - elements of each sequence are not repeated + - the order of stations is not violated. + Under these conditions we don't need LCS algorithm. Linear scan is + sufficient. + """ + i1, i2 = -1, -1 + for i1, x in enumerate(seq1): + try: + i2 = seq2.index(x) + except ValueError: + continue + else: + # x is found both in seq1 and seq2 + break + + if i2 == -1: + return [] + + # Shift cyclically so that the common element takes the first position + # both in seq1 and seq2 + seq1 = seq1[i1:] + seq1[:i1] + seq2 = seq2[i2:] + seq2[:i2] + + common_subsequence = [] + i2 = 0 + for x in seq1: + try: + i2 = seq2.index(x, i2) + except ValueError: + continue + common_subsequence.append(x) + i2 += 1 + if i2 >= len(seq2): + break + return common_subsequence + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route2.id} but not included in {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route1.id} but not included in {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.city.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) + + def __len__(self) -> int: + return len(self.routes) + + def __getitem__(self, i) -> Route: + return self.routes[i] + + def __iter__(self) -> Iterator[Route]: + return iter(self.routes) + + def __repr__(self) -> str: + return ( + f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " + f"name={self.name}, network={self.network}, " + f"num_variants={len(self.routes)}" + ) diff --git a/subways/structure/route_stop.py b/subways/structure/route_stop.py new file mode 100644 index 00000000..c67d597a --- /dev/null +++ b/subways/structure/route_stop.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import typing + +from subways.osm_element import el_center, el_id +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import LonLat, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class RouteStop: + def __init__(self, stoparea: StopArea) -> None: + self.stoparea: StopArea = stoparea + self.stop: LonLat = None # Stop position, possibly projected + self.distance = 0 # In meters from the start of the route + self.platform_entry = None # Platform el_id + self.platform_exit = None # Platform el_id + self.can_enter = False + self.can_exit = False + self.seen_stop = False + self.seen_platform_entry = False + self.seen_platform_exit = False + self.seen_station = False + + @property + def seen_platform(self) -> bool: + return self.seen_platform_entry or self.seen_platform_exit + + @staticmethod + def get_actual_role( + el: OsmElementT, role: str, modes: set[str] + ) -> str | None: + if StopArea.is_stop(el): + return "stop" + elif StopArea.is_platform(el): + return "platform" + elif Station.is_station(el, modes): + if "platform" in role: + return "platform" + else: + return "stop" + return None + + def add(self, member: dict, relation: OsmElementT, city: City) -> None: + el = city.elements[el_id(member)] + role = member["role"] + + if StopArea.is_stop(el): + if "platform" in role: + city.warn("Stop position in a platform role in a route", el) + if el["type"] != "node": + city.error("Stop position is not a node", el) + self.stop = el_center(el) + if "entry_only" not in role: + self.can_exit = True + if "exit_only" not in role: + self.can_enter = True + + elif Station.is_station(el, city.modes): + if el["type"] != "node": + city.notice("Station in route is not a node", el) + + if not self.seen_stop and not self.seen_platform: + self.stop = el_center(el) + self.can_enter = True + self.can_exit = True + + elif StopArea.is_platform(el): + if "stop" in role: + city.warn("Platform in a stop role in a route", el) + if "exit_only" not in role: + self.platform_entry = el_id(el) + self.can_enter = True + if "entry_only" not in role: + self.platform_exit = el_id(el) + self.can_exit = True + if not self.seen_stop: + self.stop = el_center(el) + + multiple_check = False + actual_role = RouteStop.get_actual_role(el, role, city.modes) + if actual_role == "platform": + if role == "platform_entry_only": + multiple_check = self.seen_platform_entry + self.seen_platform_entry = True + elif role == "platform_exit_only": + multiple_check = self.seen_platform_exit + self.seen_platform_exit = True + else: + if role != "platform" and "stop" not in role: + city.warn( + f'Platform "{el["tags"].get("name", "")}" ' + f'({el_id(el)}) with invalid role "{role}" in route', + relation, + ) + multiple_check = self.seen_platform + self.seen_platform_entry = True + self.seen_platform_exit = True + elif actual_role == "stop": + multiple_check = self.seen_stop + self.seen_stop = True + if multiple_check: + log_function = city.error if actual_role == "stop" else city.notice + log_function( + f'Multiple {actual_role}s for a station "' + f'{el["tags"].get("name", "")} ' + f"({el_id(el)}) in a route relation", + relation, + ) + + def __repr__(self) -> str: + return ( + "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( + self.stop, + self.platform_entry, + self.platform_exit, + self.stoparea, + ) + ) diff --git a/subways/structure/station.py b/subways/structure/station.py new file mode 100644 index 00000000..f1cd2faa --- /dev/null +++ b/subways/structure/station.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import typing + +from subways.consts import ALL_MODES, CONSTRUCTION_KEYS +from subways.css_colours import normalize_colour +from subways.osm_element import el_center, el_id +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class Station: + def __init__(self, el: OsmElementT, city: City) -> None: + """Call this with a railway=station OSM feature.""" + self.id: IdT = el_id(el) + self.element: OsmElementT = el + self.modes = Station.get_modes(el) + self.name = el["tags"].get("name", "?") + self.int_name = el["tags"].get( + "int_name", el["tags"].get("name:en", None) + ) + try: + self.colour = normalize_colour(el["tags"].get("colour", None)) + except ValueError as e: + self.colour = None + city.warn(str(e), el) + self.center = el_center(el) + if self.center is None: + raise Exception("Could not find center of {}".format(el)) + + @staticmethod + def get_modes(el: OsmElementT) -> set[str]: + modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} + if mode := el["tags"].get("station"): + modes.add(mode) + return modes + + @staticmethod + def is_station(el: OsmElementT, modes: set[str]) -> bool: + # public_transport=station is too ambiguous and unspecific to use, + # so we expect for it to be backed by railway=station. + if ( + "tram" in modes + and el.get("tags", {}).get("railway") == "tram_stop" + ): + return True + if el.get("tags", {}).get("railway") not in ("station", "halt"): + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + # Not checking for station=train, obviously + if "train" not in modes and Station.get_modes(el).isdisjoint(modes): + return False + return True + + def __repr__(self) -> str: + return "Station(id={}, modes={}, name={}, center={})".format( + self.id, ",".join(self.modes), self.name, self.center + ) diff --git a/subways/structure/stop_area.py b/subways/structure/stop_area.py new file mode 100644 index 00000000..913b2c7b --- /dev/null +++ b/subways/structure/stop_area.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +import typing +from itertools import chain + +from subways.consts import RAILWAY_TYPES +from subways.css_colours import normalize_colour +from subways.geom_utils import distance +from subways.osm_element import el_id, el_center +from subways.structure.station import Station +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +MAX_DISTANCE_TO_ENTRANCES = 300 # in meters + + +class StopArea: + @staticmethod + def is_stop(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") == "stop": + return True + if el["tags"].get("public_transport") == "stop_position": + return True + return False + + @staticmethod + def is_platform(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") in ("platform", "platform_edge"): + return True + if el["tags"].get("public_transport") == "platform": + return True + return False + + @staticmethod + def is_track(el: OsmElementT) -> bool: + if el["type"] != "way" or "tags" not in el: + return False + return el["tags"].get("railway") in RAILWAY_TYPES + + def __init__( + self, + station: Station, + city: City, + stop_area: OsmElementT | None = None, + ) -> None: + """Call this with a Station object.""" + + self.element: OsmElementT = stop_area or station.element + self.id: IdT = el_id(self.element) + self.station: Station = station + self.stops = set() # set of el_ids of stop_positions + self.platforms = set() # set of el_ids of platforms + self.exits = set() # el_id of subway_entrance/train_station_entrance + # for leaving the platform + self.entrances = set() # el_id of subway/train_station entrance + # for entering the platform + self.center = None # lon, lat of the station centre point + self.centers = {} # el_id -> (lon, lat) for all elements + self.transfer = None # el_id of a transfer relation + + self.modes = station.modes + self.name = station.name + self.int_name = station.int_name + self.colour = station.colour + + if stop_area: + self.name = stop_area["tags"].get("name", self.name) + self.int_name = stop_area["tags"].get( + "int_name", stop_area["tags"].get("name:en", self.int_name) + ) + try: + self.colour = ( + normalize_colour(stop_area["tags"].get("colour")) + or self.colour + ) + except ValueError as e: + city.warn(str(e), stop_area) + + self._process_members(station, city, stop_area) + else: + self._add_nearby_entrances(station, city) + + if self.exits and not self.entrances: + city.warn( + "Only exits for a station, no entrances", + stop_area or station.element, + ) + if self.entrances and not self.exits: + city.warn("No exits for a station", stop_area or station.element) + + for el in self.get_elements(): + self.centers[el] = el_center(city.elements[el]) + + """Calculate the center point of the station. This algorithm + cannot rely on a station node, since many stop_areas can share one. + Basically it averages center points of all platforms + and stop positions.""" + if len(self.stops) + len(self.platforms) == 0: + self.center = station.center + else: + self.center = [0, 0] + for sp in chain(self.stops, self.platforms): + spc = self.centers[sp] + for i in range(2): + self.center[i] += spc[i] + for i in range(2): + self.center[i] /= len(self.stops) + len(self.platforms) + + def _process_members( + self, station: Station, city: City, stop_area: OsmElementT + ) -> None: + # If we have a stop area, add all elements from it + tracks_detected = False + for m in stop_area["members"]: + k = el_id(m) + m_el = city.elements.get(k) + if not m_el or "tags" not in m_el: + continue + if Station.is_station(m_el, city.modes): + if k != station.id: + city.error("Stop area has multiple stations", stop_area) + elif StopArea.is_stop(m_el): + self.stops.add(k) + elif StopArea.is_platform(m_el): + self.platforms.add(k) + elif (entrance_type := m_el["tags"].get("railway")) in ( + "subway_entrance", + "train_station_entrance", + ): + if m_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", m_el) + if ( + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" + ): + self.entrances.add(k) + if ( + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" + ): + self.exits.add(k) + elif StopArea.is_track(m_el): + tracks_detected = True + + if tracks_detected: + city.warn("Tracks in a stop_area relation", stop_area) + + def _add_nearby_entrances(self, station: Station, city: City) -> None: + center = station.center + for entrance_el in ( + el + for el in city.elements.values() + if "tags" in el + and (entrance_type := el["tags"].get("railway")) + in ("subway_entrance", "train_station_entrance") + ): + entrance_id = el_id(entrance_el) + if entrance_id in city.stop_areas: + continue # This entrance belongs to some stop_area + c_center = el_center(entrance_el) + if ( + c_center + and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES + ): + if entrance_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", entrance_el) + etag = entrance_el["tags"].get("entrance") + if etag != "exit": + self.entrances.add(entrance_id) + if etag != "entrance": + self.exits.add(entrance_id) + + def get_elements(self) -> set[IdT]: + result = {self.id, self.station.id} + result.update(self.entrances) + result.update(self.exits) + result.update(self.stops) + result.update(self.platforms) + return result + + def __repr__(self) -> str: + return ( + f"StopArea(id={self.id}, name={self.name}, station={self.station}," + f" transfer={self.transfer}, center={self.center})" + ) diff --git a/subway_io.py b/subways/subway_io.py similarity index 96% rename from subway_io.py rename to subways/subway_io.py index 8ef5f6ff..3980b4fc 100644 --- a/subway_io.py +++ b/subways/subway_io.py @@ -1,12 +1,20 @@ +from __future__ import annotations + import json import logging +import typing from collections import OrderedDict +from io import BufferedIOBase from typing import Any, TextIO -from subway_structure import City, OsmElementT, StopArea +from subways.types import OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea -def load_xml(f: TextIO | str) -> list[OsmElementT]: +def load_xml(f: BufferedIOBase | str) -> list[OsmElementT]: try: from lxml import etree except ImportError: @@ -257,7 +265,7 @@ def write_recovery_data( def make_city_recovery_data( city: City, ) -> dict[tuple[str | None, str | None], list[dict]]: - routes: dict[tuple(str | None, str | None), list[dict]] = {} + routes: dict[tuple[str | None, str | None], list[dict]] = {} for route in city: # Recovery is based primarily on route/station names/refs. # If route's ref/colour changes, the route won't be used. diff --git a/tests/README.md b/subways/tests/README.md similarity index 100% rename from tests/README.md rename to subways/tests/README.md diff --git a/tests/__init__.py b/subways/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to subways/tests/__init__.py diff --git a/tests/assets/cities_info_with_bad_values.csv b/subways/tests/assets/cities_info_with_bad_values.csv similarity index 100% rename from tests/assets/cities_info_with_bad_values.csv rename to subways/tests/assets/cities_info_with_bad_values.csv diff --git a/tests/assets/route_masters.osm b/subways/tests/assets/route_masters.osm similarity index 100% rename from tests/assets/route_masters.osm rename to subways/tests/assets/route_masters.osm diff --git a/tests/assets/tiny_world.osm b/subways/tests/assets/tiny_world.osm similarity index 100% rename from tests/assets/tiny_world.osm rename to subways/tests/assets/tiny_world.osm diff --git a/tests/assets/tiny_world_gtfs/agency.txt b/subways/tests/assets/tiny_world_gtfs/agency.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/agency.txt rename to subways/tests/assets/tiny_world_gtfs/agency.txt diff --git a/tests/assets/tiny_world_gtfs/calendar.txt b/subways/tests/assets/tiny_world_gtfs/calendar.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/calendar.txt rename to subways/tests/assets/tiny_world_gtfs/calendar.txt diff --git a/tests/assets/tiny_world_gtfs/frequencies.txt b/subways/tests/assets/tiny_world_gtfs/frequencies.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/frequencies.txt rename to subways/tests/assets/tiny_world_gtfs/frequencies.txt diff --git a/tests/assets/tiny_world_gtfs/routes.txt b/subways/tests/assets/tiny_world_gtfs/routes.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/routes.txt rename to subways/tests/assets/tiny_world_gtfs/routes.txt diff --git a/tests/assets/tiny_world_gtfs/shapes.txt b/subways/tests/assets/tiny_world_gtfs/shapes.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/shapes.txt rename to subways/tests/assets/tiny_world_gtfs/shapes.txt diff --git a/tests/assets/tiny_world_gtfs/stop_times.txt b/subways/tests/assets/tiny_world_gtfs/stop_times.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/stop_times.txt rename to subways/tests/assets/tiny_world_gtfs/stop_times.txt diff --git a/tests/assets/tiny_world_gtfs/stops.txt b/subways/tests/assets/tiny_world_gtfs/stops.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/stops.txt rename to subways/tests/assets/tiny_world_gtfs/stops.txt diff --git a/tests/assets/tiny_world_gtfs/transfers.txt b/subways/tests/assets/tiny_world_gtfs/transfers.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/transfers.txt rename to subways/tests/assets/tiny_world_gtfs/transfers.txt diff --git a/tests/assets/tiny_world_gtfs/trips.txt b/subways/tests/assets/tiny_world_gtfs/trips.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/trips.txt rename to subways/tests/assets/tiny_world_gtfs/trips.txt diff --git a/tests/assets/twin_routes.osm b/subways/tests/assets/twin_routes.osm similarity index 100% rename from tests/assets/twin_routes.osm rename to subways/tests/assets/twin_routes.osm diff --git a/tests/assets/twin_routes_with_divergence.osm b/subways/tests/assets/twin_routes_with_divergence.osm similarity index 100% rename from tests/assets/twin_routes_with_divergence.osm rename to subways/tests/assets/twin_routes_with_divergence.osm diff --git a/tests/sample_data_for_build_tracks.py b/subways/tests/sample_data_for_build_tracks.py similarity index 100% rename from tests/sample_data_for_build_tracks.py rename to subways/tests/sample_data_for_build_tracks.py diff --git a/tests/sample_data_for_center_calculation.py b/subways/tests/sample_data_for_center_calculation.py similarity index 100% rename from tests/sample_data_for_center_calculation.py rename to subways/tests/sample_data_for_center_calculation.py diff --git a/tests/sample_data_for_error_messages.py b/subways/tests/sample_data_for_error_messages.py similarity index 78% rename from tests/sample_data_for_error_messages.py rename to subways/tests/sample_data_for_error_messages.py index 0f5a434a..907a077f 100644 --- a/tests/sample_data_for_error_messages.py +++ b/subways/tests/sample_data_for_error_messages.py @@ -42,15 +42,108 @@ "cities_info": [ { "num_stations": 2, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, }, ], "errors": [], "warnings": [], "notices": [], }, + { + "name": "Station colour tag present/absent, correct/incorrect, on bear station / with stop_area", # noqa E501 + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "cities_info": [ + { + "num_stations": 6, + }, + ], + "errors": [], + "warnings": [ + 'Unknown colour code: incorrect (node 4, "Station 4")', + 'Unknown colour code: incorrect (node 6, "Station 6")', + ], + "notices": [], + }, { "name": "Bad station order", "xml": """ diff --git a/tests/sample_data_for_outputs.py b/subways/tests/sample_data_for_outputs.py similarity index 100% rename from tests/sample_data_for_outputs.py rename to subways/tests/sample_data_for_outputs.py diff --git a/tests/sample_data_for_twin_routes.py b/subways/tests/sample_data_for_twin_routes.py similarity index 100% rename from tests/sample_data_for_twin_routes.py rename to subways/tests/sample_data_for_twin_routes.py diff --git a/tests/test_build_tracks.py b/subways/tests/test_build_tracks.py similarity index 96% rename from tests/test_build_tracks.py rename to subways/tests/test_build_tracks.py index b694bbef..2bd4108b 100644 --- a/tests/test_build_tracks.py +++ b/subways/tests/test_build_tracks.py @@ -1,5 +1,5 @@ -from tests.sample_data_for_build_tracks import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.tests.sample_data_for_build_tracks import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestOneRouteTracks(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_center_calculation.py b/subways/tests/test_center_calculation.py similarity index 91% rename from tests/test_center_calculation.py rename to subways/tests/test_center_calculation.py index 0e423605..5c83627b 100644 --- a/tests/test_center_calculation.py +++ b/subways/tests/test_center_calculation.py @@ -1,9 +1,9 @@ import io from unittest import TestCase -from process_subways import calculate_centers -from subway_io import load_xml -from tests.sample_data_for_center_calculation import metro_samples +from subways.validation import calculate_centers +from subways.subway_io import load_xml +from subways.tests.sample_data_for_center_calculation import metro_samples class TestCenterCalculation(TestCase): diff --git a/tests/test_error_messages.py b/subways/tests/test_error_messages.py similarity index 86% rename from tests/test_error_messages.py rename to subways/tests/test_error_messages.py index c8330015..d879c85e 100644 --- a/tests/test_error_messages.py +++ b/subways/tests/test_error_messages.py @@ -1,12 +1,12 @@ import itertools -from tests.sample_data_for_error_messages import ( +from subways.tests.sample_data_for_error_messages import ( metro_samples as metro_samples_error, ) -from tests.sample_data_for_twin_routes import ( +from subways.tests.sample_data_for_twin_routes import ( metro_samples as metro_samples_route_masters, ) -from tests.util import TestCase +from subways.tests.util import TestCase class TestValidationMessages(TestCase): diff --git a/tests/test_find_transfers.py b/subways/tests/test_find_transfers.py similarity index 88% rename from tests/test_find_transfers.py rename to subways/tests/test_find_transfers.py index bb46dc36..294304f7 100644 --- a/tests/test_find_transfers.py +++ b/subways/tests/test_find_transfers.py @@ -1,7 +1,7 @@ from copy import deepcopy -from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase, JsonLikeComparisonMixin +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase, JsonLikeComparisonMixin class TestTransfers(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_gtfs_processor.py b/subways/tests/test_gtfs_processor.py similarity index 95% rename from tests/test_gtfs_processor.py rename to subways/tests/test_gtfs_processor.py index 1ff7414e..d5a4dcff 100644 --- a/tests/test_gtfs_processor.py +++ b/subways/tests/test_gtfs_processor.py @@ -2,10 +2,14 @@ from functools import partial from pathlib import Path -from processors._common import transit_to_dict -from processors.gtfs import dict_to_row, GTFS_COLUMNS, transit_data_to_gtfs -from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase +from subways.processors._common import transit_to_dict +from subways.processors.gtfs import ( + dict_to_row, + GTFS_COLUMNS, + transit_data_to_gtfs, +) +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase class TestGTFS(TestCase): diff --git a/tests/test_mapsme_processor.py b/subways/tests/test_mapsme_processor.py similarity index 89% rename from tests/test_mapsme_processor.py rename to subways/tests/test_mapsme_processor.py index 64eb9cbd..c77fc6a4 100644 --- a/tests/test_mapsme_processor.py +++ b/subways/tests/test_mapsme_processor.py @@ -1,8 +1,8 @@ from operator import itemgetter -from processors.mapsme import transit_data_to_mapsme -from tests.sample_data_for_outputs import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.processors.mapsme import transit_data_to_mapsme +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestMapsme(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_overpass.py b/subways/tests/test_overpass.py similarity index 97% rename from tests/test_overpass.py rename to subways/tests/test_overpass.py index 2b0afa3d..beb03ef8 100644 --- a/tests/test_overpass.py +++ b/subways/tests/test_overpass.py @@ -1,6 +1,6 @@ from unittest import TestCase, mock -from process_subways import compose_overpass_request, overpass_request +from subways.overpass import compose_overpass_request, overpass_request class TestOverpassQuery(TestCase): @@ -150,11 +150,11 @@ def test__overpass_request(self) -> None: "%28._%3B%3E%3E%3B%29%3Bout%20body%20center%20qt%3B" ) - with mock.patch("process_subways.json.load") as load_mock: + with mock.patch("subways.overpass.json.load") as load_mock: load_mock.return_value = {"elements": []} with mock.patch( - "process_subways.urllib.request.urlopen" + "subways.overpass.urllib.request.urlopen" ) as urlopen_mock: urlopen_mock.return_value.getcode.return_value = 200 diff --git a/tests/test_prepare_cities.py b/subways/tests/test_prepare_cities.py similarity index 96% rename from tests/test_prepare_cities.py rename to subways/tests/test_prepare_cities.py index 63ddce68..09679c75 100644 --- a/tests/test_prepare_cities.py +++ b/subways/tests/test_prepare_cities.py @@ -2,7 +2,7 @@ from pathlib import Path from unittest import TestCase -from process_subways import prepare_cities +from subways.validation import prepare_cities class TestPrepareCities(TestCase): diff --git a/tests/test_projection.py b/subways/tests/test_projection.py similarity index 86% rename from tests/test_projection.py rename to subways/tests/test_projection.py index b0091aa3..770232c6 100644 --- a/tests/test_projection.py +++ b/subways/tests/test_projection.py @@ -2,17 +2,23 @@ import itertools import unittest -from subway_structure import project_on_segment +from subways.geom_utils import project_on_segment +from subways.types import LonLat class TestProjection(unittest.TestCase): - """Test subway_structure.project_on_segment function""" + """Test subways.geom_utils.project_on_segment function""" PRECISION = 10 # decimal places in assertAlmostEqual SHIFT = 1e-6 # Small distance between projected point and segment endpoint - def _test_projection_in_bulk(self, points, segments, answers): + def _test_projection_in_bulk( + self, + points: list[LonLat], + segments: list[tuple[LonLat, LonLat]], + answers: list[float | None], + ) -> None: """Test 'project_on_segment' function for array of points and array of parallel segments projections on which are equal. """ @@ -39,7 +45,7 @@ def _test_projection_in_bulk(self, points, segments, answers): f"{segment}: {u} returned, {answer} expected", ) - def test_projection_on_horizontal_segments(self): + def test_projection_on_horizontal_segments(self) -> None: points = [ (-2, 0), (-1 - self.SHIFT, 0), @@ -74,7 +80,7 @@ def test_projection_on_horizontal_segments(self): self._test_projection_in_bulk(points, horizontal_segments, answers) - def test_projection_on_vertical_segments(self): + def test_projection_on_vertical_segments(self) -> None: points = [ (0, -2), (0, -1 - self.SHIFT), @@ -109,7 +115,7 @@ def test_projection_on_vertical_segments(self): self._test_projection_in_bulk(points, vertical_segments, answers) - def test_projection_on_inclined_segment(self): + def test_projection_on_inclined_segment(self) -> None: points = [ (-2, -2), (-1, -1), @@ -128,7 +134,7 @@ def test_projection_on_inclined_segment(self): self._test_projection_in_bulk(points, segments, answers) - def test_projection_with_different_collections(self): + def test_projection_with_different_collections(self) -> None: """The tested function should accept points as any consecutive container with index operator. """ @@ -148,7 +154,7 @@ def test_projection_with_different_collections(self): s2 = s2_type(segment_end2) project_on_segment(p, s1, s2) - def test_projection_on_degenerate_segment(self): + def test_projection_on_degenerate_segment(self) -> None: coords = [-1, 0, 1] points = [(x, y) for x, y in itertools.product(coords, coords)] segments = [ diff --git a/tests/test_route_master.py b/subways/tests/test_route_master.py similarity index 96% rename from tests/test_route_master.py rename to subways/tests/test_route_master.py index 22d2f8bd..77ddf213 100644 --- a/tests/test_route_master.py +++ b/subways/tests/test_route_master.py @@ -1,6 +1,6 @@ -from subway_structure import RouteMaster -from tests.sample_data_for_twin_routes import metro_samples -from tests.util import TestCase +from subways.structure.route_master import RouteMaster +from subways.tests.sample_data_for_twin_routes import metro_samples +from subways.tests.util import TestCase class TestRouteMaster(TestCase): diff --git a/tests/test_station.py b/subways/tests/test_station.py similarity index 96% rename from tests/test_station.py rename to subways/tests/test_station.py index 2081aaa5..65f4b876 100644 --- a/tests/test_station.py +++ b/subways/tests/test_station.py @@ -1,6 +1,6 @@ from unittest import TestCase -from subway_structure import Station +from subways.structure.station import Station class TestStation(TestCase): diff --git a/tests/test_storage.py b/subways/tests/test_storage.py similarity index 86% rename from tests/test_storage.py rename to subways/tests/test_storage.py index 042f4284..692bddd1 100644 --- a/tests/test_storage.py +++ b/subways/tests/test_storage.py @@ -1,9 +1,9 @@ import json from operator import itemgetter -from processors._common import transit_to_dict -from tests.sample_data_for_outputs import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.processors._common import transit_to_dict +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestStorage(JsonLikeComparisonMixin, TestCase): diff --git a/tests/util.py b/subways/tests/util.py similarity index 98% rename from tests/util.py rename to subways/tests/util.py index bfc3fd89..73c142c2 100644 --- a/tests/util.py +++ b/subways/tests/util.py @@ -4,13 +4,13 @@ from typing import Any, TypeAlias, Self from unittest import TestCase as unittestTestCase -from process_subways import ( +from subways.structure.city import City, find_transfers +from subways.subway_io import load_xml +from subways.validation import ( add_osm_elements_to_cities, validate_cities, calculate_centers, ) -from subway_io import load_xml -from subway_structure import City, find_transfers TestCaseMixin: TypeAlias = Self | unittestTestCase diff --git a/subways/types.py b/subways/types.py new file mode 100644 index 00000000..cb1189ae --- /dev/null +++ b/subways/types.py @@ -0,0 +1,14 @@ +from typing import TypeAlias + + +OsmElementT: TypeAlias = dict +IdT: TypeAlias = str # Type of feature ids +TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs +TransfersT: TypeAlias = list[TransferT] +LonLat: TypeAlias = tuple[float, float] +RailT: TypeAlias = list[LonLat] + + +class CriticalValidationError(Exception): + """Is thrown if an error occurs + that prevents further validation of a city.""" diff --git a/subways/validation.py b/subways/validation.py new file mode 100644 index 00000000..67df60e9 --- /dev/null +++ b/subways/validation.py @@ -0,0 +1,253 @@ +import csv +import logging +import urllib.request +from functools import partial + +from subways.structure.city import City +from subways.types import CriticalValidationError, LonLat, OsmElementT + +DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" +DEFAULT_CITIES_INFO_URL = ( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" +) +BAD_MARK = "[bad]" + + +def get_way_center( + element: OsmElementT, node_centers: dict[int, LonLat] +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then ways already have 'center' attribute + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + if "nodes" not in element: + return None + + center = [0, 0] + count = 0 + way_nodes = element["nodes"] + way_nodes_len = len(element["nodes"]) + for i, nd in enumerate(way_nodes): + if nd not in node_centers: + continue + # Don't count the first node of a closed way twice + if ( + i == way_nodes_len - 1 + and way_nodes_len > 1 + and way_nodes[0] == way_nodes[-1] + ): + break + center[0] += node_centers[nd][0] + center[1] += node_centers[nd][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def get_relation_center( + element: OsmElementT, + node_centers: dict[int, LonLat], + way_centers: dict[int, LonLat], + relation_centers: dict[int, LonLat], + ignore_unlocalized_child_relations: bool = False, +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :param way_centers: osm_id => LonLat + :param relation_centers: osm_id => LonLat + :param ignore_unlocalized_child_relations: if a member that is a relation + has no center, skip it and calculate center based on member nodes, + ways and other, "localized" (with known centers), relations + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then some relations already have 'center' + # attribute. But this is not the case for relations composed only + # of other relations (e.g., route_master, stop_area_group or + # stop_area with only members that are multipolygons) + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + center = [0, 0] + count = 0 + for m in element.get("members", list()): + m_id = m["ref"] + m_type = m["type"] + if m_type == "relation" and m_id not in relation_centers: + if ignore_unlocalized_child_relations: + continue + else: + # Cannot calculate fair center because the center + # of a child relation is not known yet + return None + member_container = ( + node_centers + if m_type == "node" + else way_centers + if m_type == "way" + else relation_centers + ) + if m_id in member_container: + center[0] += member_container[m_id][0] + center[1] += member_container[m_id][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def calculate_centers(elements: list[OsmElementT]) -> None: + """Adds 'center' key to each way/relation in elements, + except for empty ways or relations. + Relies on nodes-ways-relations order in the elements list. + """ + nodes: dict[int, LonLat] = {} # id => LonLat + ways: dict[int, LonLat] = {} # id => approx center LonLat + relations: dict[int, LonLat] = {} # id => approx center LonLat + + unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means + # the center of the relation has not been calculated yet + + for el in elements: + if el["type"] == "node": + nodes[el["id"]] = (el["lon"], el["lat"]) + elif el["type"] == "way": + if center := get_way_center(el, nodes): + ways[el["id"]] = center + elif el["type"] == "relation": + if center := get_relation_center(el, nodes, ways, relations): + relations[el["id"]] = center + else: + unlocalized_relations.append(el) + + def iterate_relation_centers_calculation( + ignore_unlocalized_child_relations: bool, + ) -> list[OsmElementT]: + unlocalized_relations_upd = [] + for rel in unlocalized_relations: + if center := get_relation_center( + rel, nodes, ways, relations, ignore_unlocalized_child_relations + ): + relations[rel["id"]] = center + else: + unlocalized_relations_upd.append(rel) + return unlocalized_relations_upd + + # Calculate centers for relations that have no one yet + while unlocalized_relations: + unlocalized_relations_upd = iterate_relation_centers_calculation(False) + progress = len(unlocalized_relations_upd) < len(unlocalized_relations) + if not progress: + unlocalized_relations_upd = iterate_relation_centers_calculation( + True + ) + progress = len(unlocalized_relations_upd) < len( + unlocalized_relations + ) + if not progress: + break + unlocalized_relations = unlocalized_relations_upd + + +def add_osm_elements_to_cities( + osm_elements: list[OsmElementT], cities: list[City] +) -> None: + for el in osm_elements: + for c in cities: + if c.contains(el): + c.add(el) + + +def validate_cities(cities: list[City]) -> list[City]: + """Validate cities. Return list of good cities.""" + good_cities = [] + for c in cities: + try: + c.extract_routes() + except CriticalValidationError as e: + logging.error( + "Critical validation error while processing %s: %s", + c.name, + e, + ) + c.error(str(e)) + except AssertionError as e: + logging.error( + "Validation logic error while processing %s: %s", + c.name, + e, + ) + c.error(f"Validation logic error: {e}") + else: + c.validate() + if c.is_good: + c.calculate_distances() + good_cities.append(c) + + return good_cities + + +def get_cities_info( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, +) -> list[dict]: + response = urllib.request.urlopen(cities_info_url) + if ( + not cities_info_url.startswith("file://") + and (r_code := response.getcode()) != 200 + ): + raise Exception( + f"Failed to download cities spreadsheet: HTTP {r_code}" + ) + data = response.read().decode("utf-8") + reader = csv.DictReader( + data.splitlines(), + fieldnames=( + "id", + "name", + "country", + "continent", + "num_stations", + "num_lines", + "num_light_lines", + "num_interchanges", + "bbox", + "networks", + ), + ) + + cities_info = list() + names = set() + next(reader) # skipping the header + for city_info in reader: + if city_info["id"] and city_info["bbox"]: + cities_info.append(city_info) + name = city_info["name"].strip() + if name in names: + logging.warning( + "Duplicate city name in city list: %s", + city_info, + ) + names.add(name) + return cities_info + + +def prepare_cities( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False +) -> list[City]: + if overground: + raise NotImplementedError("Overground transit not implemented yet") + cities_info = get_cities_info(cities_info_url) + return list(map(partial(City, overground=overground), cities_info)) diff --git a/checkers/common.py b/tools/checkers/common.py similarity index 100% rename from checkers/common.py rename to tools/checkers/common.py diff --git a/checkers/compare_city_caches.py b/tools/checkers/compare_city_caches.py similarity index 100% rename from checkers/compare_city_caches.py rename to tools/checkers/compare_city_caches.py diff --git a/checkers/compare_json_outputs.py b/tools/checkers/compare_json_outputs.py similarity index 100% rename from checkers/compare_json_outputs.py rename to tools/checkers/compare_json_outputs.py diff --git a/mapsme_json_to_cities.py b/tools/legacy/mapsme_json_to_cities.py similarity index 89% rename from mapsme_json_to_cities.py rename to tools/legacy/mapsme_json_to_cities.py index 736b74b7..65cbf5f3 100644 --- a/mapsme_json_to_cities.py +++ b/tools/legacy/mapsme_json_to_cities.py @@ -2,14 +2,18 @@ Generate sorted list of all cities, with [bad] mark for bad cities. !!! Deprecated for use in validation cycle. -Use "process_subways.py --dump-city-list " instead. +Use "scripts/process_subways.py --dump-city-list " instead. """ import argparse import json -from process_subways import BAD_MARK, DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import ( + BAD_MARK, + DEFAULT_CITIES_INFO_URL, + get_cities_info, +) if __name__ == "__main__": @@ -19,7 +23,7 @@ used by subway render to generate the list of network at frontend. It uses two sources: a mapsme.json validator output with good networks, and a google spreadsheet with networks for the - process_subways.download_cities() function.""" + subways.validation.get_cities_info() function.""" ), formatter_class=argparse.RawTextHelpFormatter, ) diff --git a/make_all_metro_poly.py b/tools/make_poly/make_all_metro_poly.py similarity index 95% rename from make_all_metro_poly.py rename to tools/make_poly/make_all_metro_poly.py index e8450a24..88f9b8aa 100644 --- a/make_all_metro_poly.py +++ b/tools/make_poly/make_all_metro_poly.py @@ -3,7 +3,7 @@ from shapely import unary_union from shapely.geometry import MultiPolygon, Polygon -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import DEFAULT_CITIES_INFO_URL, get_cities_info def make_disjoint_metro_polygons(cities_info_url: str) -> None: diff --git a/tools/make_poly/tests/__init__.py b/tools/make_poly/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/assets/cities_info_1city.csv b/tools/make_poly/tests/assets/cities_info_1city.csv similarity index 100% rename from tests/assets/cities_info_1city.csv rename to tools/make_poly/tests/assets/cities_info_1city.csv diff --git a/tests/assets/cities_info_2cities.csv b/tools/make_poly/tests/assets/cities_info_2cities.csv similarity index 100% rename from tests/assets/cities_info_2cities.csv rename to tools/make_poly/tests/assets/cities_info_2cities.csv diff --git a/tests/test_make_all_metro_poly.py b/tools/make_poly/tests/test_make_all_metro_poly.py similarity index 94% rename from tests/test_make_all_metro_poly.py rename to tools/make_poly/tests/test_make_all_metro_poly.py index dac8dae5..d6df8311 100644 --- a/tests/test_make_all_metro_poly.py +++ b/tools/make_poly/tests/test_make_all_metro_poly.py @@ -1,6 +1,6 @@ import contextlib import io -import os +from pathlib import Path from unittest import TestCase from make_all_metro_poly import make_disjoint_metro_polygons @@ -63,9 +63,8 @@ class TestMakeAllMetroPoly(TestCase): def test_make_disjoint_metro_polygons(self) -> None: for case in cases: with self.subTest(msg=case["csv_file"]): - file_url = ( - f"file://{os.getcwd()}/tests/assets/{case['csv_file']}" - ) + assets_dir = Path(__file__).resolve().parent / "assets" + file_url = f"file://{assets_dir}/{case['csv_file']}" stream = io.StringIO() with contextlib.redirect_stdout(stream): make_disjoint_metro_polygons(file_url) diff --git a/stop_areas/make_stop_areas.py b/tools/stop_areas/make_stop_areas.py similarity index 100% rename from stop_areas/make_stop_areas.py rename to tools/stop_areas/make_stop_areas.py diff --git a/stop_areas/make_tram_areas.py b/tools/stop_areas/make_tram_areas.py similarity index 100% rename from stop_areas/make_tram_areas.py rename to tools/stop_areas/make_tram_areas.py diff --git a/stop_areas/requirements.txt b/tools/stop_areas/requirements.txt similarity index 100% rename from stop_areas/requirements.txt rename to tools/stop_areas/requirements.txt diff --git a/stop_areas/serve.py b/tools/stop_areas/serve.py similarity index 100% rename from stop_areas/serve.py rename to tools/stop_areas/serve.py diff --git a/stop_areas/templates/index.html b/tools/stop_areas/templates/index.html similarity index 100% rename from stop_areas/templates/index.html rename to tools/stop_areas/templates/index.html diff --git a/v2h_templates.py b/tools/v2h/v2h_templates.py similarity index 100% rename from v2h_templates.py rename to tools/v2h/v2h_templates.py diff --git a/validation_to_html.py b/tools/v2h/validation_to_html.py similarity index 99% rename from validation_to_html.py rename to tools/v2h/validation_to_html.py index 0f9ec3b8..42158f89 100755 --- a/validation_to_html.py +++ b/tools/v2h/validation_to_html.py @@ -9,7 +9,7 @@ from collections import defaultdict from typing import Any -from process_subways import DEFAULT_SPREADSHEET_ID +from subways.validation import DEFAULT_SPREADSHEET_ID from v2h_templates import ( COUNTRY_CITY, COUNTRY_FOOTER, From 179dcb6a6f3683bd4c1b7bb676a156b4b5509de9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 20 Mar 2024 20:24:19 +0300 Subject: [PATCH 10/13] Use virtual environments --- .github/workflows/python-app.yml | 18 ++++++++--- README.md | 6 ++-- scripts/process_subways.sh | 53 +++++++++++++++++++++++--------- scripts/requirements.txt | 1 + subways/tests/README.md | 15 ++++----- tools/make_poly/requirements.txt | 1 + 6 files changed, 63 insertions(+), 31 deletions(-) create mode 100644 scripts/requirements.txt create mode 100644 tools/make_poly/requirements.txt diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 55ce3530..37b8af81 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,17 +23,25 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.11" - - name: Install dependencies + - name: Install dependencies for linters run: | python -m pip install --upgrade pip - pip install flake8==6.0.0 black==23.1.0 shapely==2.0.1 - pip install -r subways/requirements.txt + pip install flake8==6.0.0 black==23.1.0 - name: Lint with flake8 run: | flake8 - name: Check with black run: | black --check --line-length 79 . - - name: Test with unittest + - name: Test subways with unittest + run: | + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r subways/requirements.txt + python -m unittest discover subways + - name: Test tools with unittest run: | - python -m unittest discover tests \ No newline at end of file + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r tools/make_poly/requirements.txt + python -m unittest discover tools/make_poly diff --git a/README.md b/README.md index 157e1ad2..516c6365 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,11 @@ if you allow the `scripts/process_subway.py` to fetch data from Overpass API. He git clone https://github.com/alexey-zakharenkov/subways.git subways_validator cd subways_validator ``` -3. Install python dependencies +3. Configure python environment, e.g. ```bash - pip install -r subways/requirements.txt + python3 -m venv scripts/.venv + source scripts/.venv/bin/activate + pip install scripts/requirements.txt ``` 4. Execute ```bash diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 62a45e75..837463a6 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -63,6 +63,22 @@ EOF fi +function activate_venv_at_path() { + path=$1 + + if [ ! -d "$path/".venv ]; then + "${PYTHON:-python3.11}" -m venv "$path"/.venv + fi + + source "$path"/.venv/bin/activate + + if [ -f "$path"/requirements.txt ]; then + pip install --upgrade pip + pip install -r "$path"/requirements.txt + fi +} + + function check_osmctools() { OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" if [ ! -f "$OSMCTOOLS/osmupdate" ]; then @@ -91,39 +107,39 @@ function check_poly() { if [ -z "${POLY-}" -o ! -f "${POLY-}" ]; then POLY=${POLY:-$(mktemp "$TMPDIR/all-metro.XXXXXXXX.poly")} - if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then - "$PYTHON" -m pip install shapely==2.0.1 - fi - "$PYTHON" "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ + activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/make_poly" + python "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" + deactivate fi fi POLY_CHECKED=1 fi } - -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null - # "readlink -f" echoes canonicalized absolute path to a file/directory SUBWAYS_REPO_PATH="$(readlink -f $(dirname "$0")/..)" if [ ! -f "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ]; then - echo "Please clone the subways repo to $SUBWAYS_PATH" + echo "Please clone the subways repo to $SUBWAYS_REPO_PATH" exit 2 fi -TMPDIR="${TMPDIR:-$SUBWAYS_REPO_PATH}" -mkdir -p "$TMPDIR" +# Contains 'subways' dir and is required by the main validator python script +# as well as by some tools +export PYTHONPATH="$SUBWAYS_REPO_PATH" # Downloading the latest version of the subways script if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" + pushd "$SUBWAYS_REPO_PATH" git pull origin master + popd ) fi + +TMPDIR="${TMPDIR:-"$SUBWAYS_REPO_PATH"}" +mkdir -p "$TMPDIR" + if [ -z "${FILTERED_DATA-}" ]; then FILTERED_DATA="$TMPDIR/subways.osm" NEED_TO_REMOVE_FILTERED_DATA=1 @@ -244,7 +260,9 @@ if [ -n "${DUMP-}" ]; then fi VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/scripts" +python "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ @@ -256,6 +274,8 @@ VALIDATION="$TMPDIR/validation.json" ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ ${RECOVERY_PATH:+-r "$RECOVERY_PATH"} +deactivate + if [ -n "${NEED_TO_REMOVE_FILTERED_DATA-}" ]; then rm "$FILTERED_DATA" @@ -270,9 +290,12 @@ fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/v2h" +python "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ "$VALIDATION" "$HTML_DIR" +deactivate # Uploading files to the server diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..1f71eee1 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +-r ../subways/requirements.txt diff --git a/subways/tests/README.md b/subways/tests/README.md index d6da4668..44238098 100644 --- a/subways/tests/README.md +++ b/subways/tests/README.md @@ -1,13 +1,10 @@ -To perform tests manually, run this command from the top directory +To perform tests, run this command from the top directory of the repository: ```bash -python -m unittest discover tests +export PYTHONPATH=$(pwd) +[ -d "subways/tests/.venv" ] || python3 -m venv subways/tests/.venv +source subways/tests/.venv/bin/activate +pip install -r subways/requirements.txt +python -m unittest discover subways ``` - -or simply - -```bash -python -m unittest -``` - diff --git a/tools/make_poly/requirements.txt b/tools/make_poly/requirements.txt new file mode 100644 index 00000000..67ee66cf --- /dev/null +++ b/tools/make_poly/requirements.txt @@ -0,0 +1 @@ +shapely==2.0.1 From 6a4c2a255f24862a7cdf4519973076e0e289b7f4 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 21 Mar 2024 13:12:05 +0300 Subject: [PATCH 11/13] Remove obsolete scripts --- README.md | 2 +- scripts/build_city.sh | 16 ------- scripts/build_trams.sh | 16 ------- scripts/process_trams.sh | 94 ---------------------------------------- 4 files changed, 1 insertion(+), 127 deletions(-) delete mode 100755 scripts/build_city.sh delete mode 100755 scripts/build_trams.sh delete mode 100755 scripts/process_trams.sh diff --git a/README.md b/README.md index 516c6365..44e4742d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Subway Preprocessor Here you see a list of scripts that can be used for preprocessing all the metro -systems in the world from OpenStreetMap. `scripts/subway_structure.py` produces +systems in the world from OpenStreetMap. `subways` package produces a list of disjunct systems that can be used for routing and for displaying of metro maps. diff --git a/scripts/build_city.sh b/scripts/build_city.sh deleted file mode 100755 index 6b8d8af0..00000000 --- a/scripts/build_city.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities() if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_subways.sh" "$1" diff --git a/scripts/build_trams.sh b/scripts/build_trams.sh deleted file mode 100755 index 6b62d245..00000000 --- a/scripts/build_trams.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities(True) if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_trams.sh" "$1" diff --git a/scripts/process_trams.sh b/scripts/process_trams.sh deleted file mode 100755 index 84e007a0..00000000 --- a/scripts/process_trams.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/bash -set -e -u - -if [ $# -lt 1 -a -z "${PLANET-}" ]; then - echo "This script updates a planet or an extract, processes tram networks in it" - echo "and produses a set of HTML files with validation results." - echo - echo "Usage: $0 " - echo - echo "Variable reference:" - echo "- PLANET: path for the source o5m file (the entire planet or an extract)" - echo "- CITY: name of a city to process" - echo "- BBOX: bounding box of an extract; x1,y1,x2,y2" - echo "- DUMP: file name to dump city data" - echo "- MAPSME: file name for maps.me json output" - echo "- OSMCTOOLS: path to osmconvert and osmupdate binaries" - echo "- PYTHON: python 3 executable" - echo "- GIT_PULL: set to 1 to update the scripts" - echo "- TMPDIR: path to temporary files" - echo "- HTML_DIR: target path for generated HTML files" - echo "- SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/)" - echo "- SERVER_KEY: rsa key to supply for uploading the files" - echo "- REMOVE_HTML: set to 1 to remove HTML_DIR after uploading" - exit 1 -fi - -[ -n "${WHAT-}" ] && echo WHAT - -PLANET="${PLANET:-${1-}}" -[ ! -f "$PLANET" ] && echo "Cannot find planet file $PLANET" && exit 2 -OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" -if [ ! -f "$OSMCTOOLS/osmupdate" ]; then - if which osmupdate > /dev/null; then - OSMCTOOLS="$(dirname "$(which osmupdate)")" - else - echo "Please compile osmctools to $OSMCTOOLS" - exit 1 - fi -fi -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null -SUBWAYS_PATH="$(dirname "$0")/.." -[ ! -f "$SUBWAYS_PATH/process_subways.py" ] && echo "Please clone the subways repo to $SUBWAYS_PATH" && exit 2 -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" - -# Downloading the latest version of the subways script - - -if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" - git pull origin master -) fi - - -# Updating the planet file - -PLANET_ABS="$(cd "$(dirname "$PLANET")"; pwd)/$(basename "$PLANET")" -( - cd "$OSMCTOOLS" # osmupdate requires osmconvert in a current directory - ./osmupdate --drop-author --out-o5m "$PLANET_ABS" ${BBOX+"-b=$BBOX"} "$PLANET_ABS.new.o5m" && mv "$PLANET_ABS.new.o5m" "$PLANET_ABS" || true -) - -# Filtering it - -FILTERED_DATA="$TMPDIR/subways.osm" -QRELATIONS="route=tram route_master=tram public_transport=stop_area =stop_area_group" -QNODES="railway=tram_stop railway=subway_entrance tram=yes" -"$OSMCTOOLS/osmfilter" "$PLANET" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author "-o=$FILTERED_DATA" - -# Running the validation - -VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -t -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"} -rm "$FILTERED_DATA" - -# Preparing HTML files - -if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" - REMOVE_HTML=1 -fi - -mkdir -p $HTML_DIR -rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR" -rm "$VALIDATION" - -# Uploading files to the server - -if [ -n "${SERVER-}" ]; then - scp -q ${SERVER_KEY+-i "$SERVER_KEY"} "$HTML_DIR"/* "$SERVER" - [ -n "${REMOVE_HTML-}" ] && rm -r "$HTML_DIR" -fi From aff6a9f129c09cc046176358c40250fc61765e43 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 4 Apr 2024 12:48:29 +0300 Subject: [PATCH 12/13] Process route duration (average vehicle speed) --- subways/osm_element.py | 7 + subways/processors/_common.py | 2 + subways/processors/gtfs.py | 14 ++ subways/processors/mapsme.py | 4 +- subways/structure/city.py | 26 ++-- subways/structure/route.py | 137 ++++++++++------- subways/structure/route_master.py | 33 ++-- subways/tests/assets/tiny_world.osm | 9 +- .../tests/assets/tiny_world_gtfs/trips.txt | 14 +- subways/tests/sample_data_for_outputs.py | 6 + subways/tests/test_route.py | 141 ++++++++++++++++++ 11 files changed, 303 insertions(+), 90 deletions(-) create mode 100644 subways/tests/test_route.py diff --git a/subways/osm_element.py b/subways/osm_element.py index 5ea8bc4b..19861da3 100644 --- a/subways/osm_element.py +++ b/subways/osm_element.py @@ -17,3 +17,10 @@ def el_center(el: OsmElementT) -> LonLat | None: elif "center" in el: return el["center"]["lon"], el["center"]["lat"] return None + + +def get_network(relation: OsmElementT) -> str | None: + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] + return None diff --git a/subways/processors/_common.py b/subways/processors/_common.py index 1d58da45..55658940 100644 --- a/subways/processors/_common.py +++ b/subways/processors/_common.py @@ -10,6 +10,7 @@ DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier +DEFAULT_AVE_VEHICLE_SPEED = 40 * KMPH_TO_MPS # m/s SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s TRANSFER_PENALTY = 30 # seconds @@ -52,6 +53,7 @@ def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict: "start_time": route.start_time, "end_time": route.end_time, "interval": route.interval, + "duration": route.duration, "stops": [ { "stoparea_id": route_stop.stoparea.id, diff --git a/subways/processors/gtfs.py b/subways/processors/gtfs.py index 3722815f..df70cc72 100644 --- a/subways/processors/gtfs.py +++ b/subways/processors/gtfs.py @@ -9,8 +9,10 @@ from zipfile import ZipFile from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, + KMPH_TO_MPS, SPEED_ON_TRANSFER, TRANSFER_PENALTY, transit_to_dict, @@ -63,6 +65,7 @@ "trip_route_type", "route_pattern_id", "bikes_allowed", + "average_speed", # extension field (km/h) ], "stops": [ "stop_id", @@ -242,11 +245,22 @@ def transit_data_to_gtfs(data: dict) -> dict: for itinerary in route_master["itineraries"]: shape_id = itinerary["id"][1:] # truncate leading 'r' + average_speed = round( + ( + DEFAULT_AVE_VEHICLE_SPEED + if not itinerary["duration"] + else itinerary["stops"][-1]["distance"] + / itinerary["duration"] + ) + / KMPH_TO_MPS, + 1, + ) # km/h trip = { "trip_id": itinerary["id"], "route_id": route_master["id"], "service_id": "always", "shape_id": shape_id, + "average_speed": average_speed, } gtfs_data["trips"].append(trip) diff --git a/subways/processors/mapsme.py b/subways/processors/mapsme.py index e176832b..32f5b695 100755 --- a/subways/processors/mapsme.py +++ b/subways/processors/mapsme.py @@ -14,6 +14,7 @@ from subways.structure.station import Station from subways.types import IdT, LonLat, OsmElementT, TransfersT from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, KMPH_TO_MPS, @@ -29,7 +30,6 @@ OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s -SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s # (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid TransferTimesT: TypeAlias = dict[tuple[int, int], int] @@ -258,7 +258,7 @@ def find_exits_for_platform( itin.append( [ uid(stop.stoparea.id), - round(stop.distance / SPEED_ON_LINE), + round(stop.distance / DEFAULT_AVE_VEHICLE_SPEED), ] ) # Make exits from platform nodes, diff --git a/subways/structure/city.py b/subways/structure/city.py index 441c08b1..480a0fd6 100644 --- a/subways/structure/city.py +++ b/subways/structure/city.py @@ -8,7 +8,7 @@ DEFAULT_MODES_OVERGROUND, DEFAULT_MODES_RAPID, ) -from subways.osm_element import el_center, el_id +from subways.osm_element import el_center, el_id, get_network from subways.structure.route import Route from subways.structure.route_master import RouteMaster from subways.structure.station import Station @@ -287,11 +287,11 @@ def extract_routes(self) -> None: if el["tags"].get("access") in ("no", "private"): continue route_id = el_id(el) - master = self.masters.get(route_id, None) + master_element = self.masters.get(route_id, None) if self.networks: - network = Route.get_network(el) - if master: - master_network = Route.get_network(master) + network = get_network(el) + if master_element: + master_network = get_network(master_element) else: master_network = None if ( @@ -300,7 +300,7 @@ def extract_routes(self) -> None: ): continue - route = self.route_class(el, self, master) + route = self.route_class(el, self, master_element) if not route.stops: self.warn("Route has no stops", el) continue @@ -308,15 +308,11 @@ def extract_routes(self) -> None: self.warn("Route has only one stop", el) continue - k = el_id(master) if master else route.ref - if k not in self.routes: - self.routes[k] = RouteMaster(self, master) - self.routes[k].add(route) - - # Sometimes adding a route to a newly initialized RouteMaster - # can fail - if len(self.routes[k]) == 0: - del self.routes[k] + master_id = el_id(master_element) or route.ref + route_master = self.routes.setdefault( + master_id, RouteMaster(self, master_element) + ) + route_master.add(route) # And while we're iterating over relations, find interchanges if ( diff --git a/subways/structure/route.py b/subways/structure/route.py index 926733ed..f2ff3c3c 100644 --- a/subways/structure/route.py +++ b/subways/structure/route.py @@ -2,7 +2,7 @@ import re import typing -from collections.abc import Callable, Iterator +from collections.abc import Callable, Collection, Iterator from itertools import islice from subways.consts import ( @@ -18,7 +18,7 @@ find_segment, project_on_line, ) -from subways.osm_element import el_id, el_center +from subways.osm_element import el_id, el_center, get_network from subways.structure.route_stop import RouteStop from subways.structure.station import Station from subways.structure.stop_area import StopArea @@ -33,24 +33,29 @@ DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees -def get_start_end_times( +def parse_time_range( opening_hours: str, -) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: +) -> tuple[tuple[int, int], tuple[int, int]] | None: """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable opening hours interval for the most of the weekdays. """ - start_time, end_time = None, None + if opening_hours == "24/7": + return (0, 0), (24, 0) + m = START_END_TIMES_RE.match(opening_hours) - if m: - ints = tuple(map(int, m.groups())) - start_time = (ints[0], ints[1]) - end_time = (ints[2], ints[3]) + if not m: + return None + ints = tuple(map(int, m.groups())) + if ints[1] > 59 or ints[3] > 59: + return None + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) return start_time, end_time def osm_interval_to_seconds(interval_str: str) -> int | None: - """Convert to int an OSM value for 'interval'/'headway' tag + """Convert to int an OSM value for 'interval'/'headway'/'duration' tag which may be in these formats: HH:MM:SS, HH:MM, @@ -71,7 +76,54 @@ def osm_interval_to_seconds(interval_str: str) -> int | None: return None except ValueError: return None - return seconds + 60 * minutes + 60 * 60 * hours + + if seconds < 0 or minutes < 0 or hours < 0: + return None + if semicolon_count > 0 and (seconds >= 60 or minutes >= 60): + return None + + interval = seconds + 60 * minutes + 60 * 60 * hours + if interval == 0: + return None + return interval + + +def get_interval_in_seconds_from_tags( + tags: dict, keys: str | Collection[str] +) -> int | None: + """Extract time interval value from tags for keys among "keys". + E.g., "interval" and "headway" means the same in OSM. + Examples: + interval=5 => 300 + headway:peak=00:01:30 => 90 + """ + if isinstance(keys, str): + keys = (keys,) + + value = None + for key in keys: + if key in tags: + value = tags[key] + break + if value is None: + for key in keys: + if value: + break + for tag_name in tags: + if tag_name.startswith(key + ":"): + value = tags[tag_name] + break + if not value: + return None + return osm_interval_to_seconds(value) + + +def get_route_interval(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, ("interval", "headway")) + + +def get_route_duration(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, "duration") class Route: @@ -95,29 +147,6 @@ def is_route(el: OsmElementT, modes: set[str]) -> bool: return False return True - @staticmethod - def get_network(relation: OsmElementT) -> str | None: - for k in ("network:metro", "network", "operator"): - if k in relation["tags"]: - return relation["tags"][k] - return None - - @staticmethod - def get_interval(tags: dict) -> int | None: - v = None - for k in ("interval", "headway"): - if k in tags: - v = tags[k] - break - else: - for kk in tags: - if kk.startswith(k + ":"): - v = tags[kk] - break - if not v: - return None - return osm_interval_to_seconds(v) - def stopareas(self) -> Iterator[StopArea]: yielded_stopareas = set() for route_stop in self: @@ -146,6 +175,7 @@ def __init__( self.infill = None self.network = None self.interval = None + self.duration = None self.start_time = None self.end_time = None self.is_circular = False @@ -319,46 +349,51 @@ def calculate_distances(self) -> None: def process_tags(self, master: OsmElementT) -> None: relation = self.element + tags = relation["tags"] master_tags = {} if not master else master["tags"] - if "ref" not in relation["tags"] and "ref" not in master_tags: + if "ref" not in tags and "ref" not in master_tags: self.city.notice("Missing ref on a route", relation) - self.ref = relation["tags"].get( - "ref", master_tags.get("ref", relation["tags"].get("name", None)) + self.ref = tags.get( + "ref", master_tags.get("ref", tags.get("name", None)) ) - self.name = relation["tags"].get("name", None) - self.mode = relation["tags"]["route"] + self.name = tags.get("name", None) + self.mode = tags["route"] if ( - "colour" not in relation["tags"] + "colour" not in tags and "colour" not in master_tags and self.mode != "tram" ): self.city.notice("Missing colour on a route", relation) try: self.colour = normalize_colour( - relation["tags"].get("colour", master_tags.get("colour", None)) + tags.get("colour", master_tags.get("colour", None)) ) except ValueError as e: self.colour = None self.city.warn(str(e), relation) try: self.infill = normalize_colour( - relation["tags"].get( + tags.get( "colour:infill", master_tags.get("colour:infill", None) ) ) except ValueError as e: self.infill = None self.city.warn(str(e), relation) - self.network = Route.get_network(relation) - self.interval = Route.get_interval( - relation["tags"] - ) or Route.get_interval(master_tags) - self.start_time, self.end_time = get_start_end_times( - relation["tags"].get( - "opening_hours", master_tags.get("opening_hours", "") - ) + self.network = get_network(relation) + self.interval = get_route_interval(tags) or get_route_interval( + master_tags ) - if relation["tags"].get("public_transport:version") == "1": + self.duration = get_route_duration(tags) or get_route_duration( + master_tags + ) + parsed_time_range = parse_time_range( + tags.get("opening_hours", master_tags.get("opening_hours", "")) + ) + if parsed_time_range: + self.start_time, self.end_time = parsed_time_range + + if tags.get("public_transport:version") == "1": self.city.warn( "Public transport version is 1, which means the route " "is an unsorted pile of objects", diff --git a/subways/structure/route_master.py b/subways/structure/route_master.py index 36ab1484..891ae203 100644 --- a/subways/structure/route_master.py +++ b/subways/structure/route_master.py @@ -7,8 +7,8 @@ from subways.consts import MAX_DISTANCE_STOP_TO_LINE from subways.css_colours import normalize_colour from subways.geom_utils import distance, project_on_line -from subways.osm_element import el_id -from subways.structure.route import Route +from subways.osm_element import el_id, get_network +from subways.structure.route import get_route_duration, get_route_interval from subways.structure.stop_area import StopArea from subways.types import IdT, OsmElementT @@ -26,7 +26,7 @@ class RouteMaster: def __init__(self, city: City, master: OsmElementT = None) -> None: self.city = city self.routes = [] - self.best: Route = None + self.best: Route = None # noqa: F821 self.id: IdT = el_id(master) self.has_master = master is not None self.interval_from_master = False @@ -46,13 +46,14 @@ def __init__(self, city: City, master: OsmElementT = None) -> None: ) except ValueError: self.infill = None - self.network = Route.get_network(master) + self.network = get_network(master) self.mode = master["tags"].get( "route_master", None ) # This tag is required, but okay self.name = master["tags"].get("name", None) - self.interval = Route.get_interval(master["tags"]) + self.interval = get_route_interval(master["tags"]) self.interval_from_master = self.interval is not None + self.duration = get_route_duration(master["tags"]) else: self.ref = None self.colour = None @@ -61,6 +62,7 @@ def __init__(self, city: City, master: OsmElementT = None) -> None: self.mode = None self.name = None self.interval = None + self.duration = None def stopareas(self) -> Iterator[StopArea]: yielded_stopareas = set() @@ -70,7 +72,7 @@ def stopareas(self) -> Iterator[StopArea]: yield stoparea yielded_stopareas.add(stoparea) - def add(self, route: Route) -> None: + def add(self, route: Route) -> None: # noqa: F821 if not self.network: self.network = route.network elif route.network and route.network != self.network: @@ -148,10 +150,10 @@ def add(self, route: Route) -> None: ): self.best = route - def get_meaningful_routes(self) -> list[Route]: + def get_meaningful_routes(self) -> list[Route]: # noqa: F821 return [route for route in self if len(route) >= 2] - def find_twin_routes(self) -> dict[Route, Route]: + def find_twin_routes(self) -> dict[Route, Route]: # noqa: F821 """Two non-circular routes are twins if they have the same end stations and opposite directions, and the number of stations is the same or almost the same. We'll then find stops that are present @@ -325,7 +327,11 @@ def find_common_circular_subsequence( break return common_subsequence - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + def alert_twin_routes_differ( + self, + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> None: """Arguments are that route1.id < route2.id""" ( stops_missing_from_route1, @@ -382,7 +388,10 @@ def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: ) @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + def calculate_twin_routes_diff( + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> tuple: """Wagner–Fischer algorithm for stops diff in two twin routes.""" stops1 = route1.stops @@ -450,10 +459,10 @@ def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: def __len__(self) -> int: return len(self.routes) - def __getitem__(self, i) -> Route: + def __getitem__(self, i) -> Route: # noqa: F821 return self.routes[i] - def __iter__(self) -> Iterator[Route]: + def __iter__(self) -> Iterator[Route]: # noqa: F821 return iter(self.routes) def __repr__(self) -> str: diff --git a/subways/tests/assets/tiny_world.osm b/subways/tests/assets/tiny_world.osm index 276fb804..4cd0631e 100644 --- a/subways/tests/assets/tiny_world.osm +++ b/subways/tests/assets/tiny_world.osm @@ -187,9 +187,10 @@ + + - @@ -198,6 +199,7 @@ + @@ -208,6 +210,7 @@ + @@ -217,18 +220,18 @@ + - + - diff --git a/subways/tests/assets/tiny_world_gtfs/trips.txt b/subways/tests/assets/tiny_world_gtfs/trips.txt index 41da841a..80615596 100644 --- a/subways/tests/assets/tiny_world_gtfs/trips.txt +++ b/subways/tests/assets/tiny_world_gtfs/trips.txt @@ -1,7 +1,7 @@ -route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed -r15,always,r7,,,,,7,,,, -r15,always,r8,,,,,8,,,, -r14,always,r12,,,,,12,,,, -r14,always,r13,,,,,13,,,, -r11,always,r9,,,,,9,,,, -r11,always,r10,,,,,10,,,, +route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed,average_speed +r15,always,r7,,,,,7,,,,,40.0 +r15,always,r8,,,,,8,,,,,40.0 +r14,always,r12,,,,,12,,,,,9.4 +r14,always,r13,,,,,13,,,,,11.8 +r11,always,r9,,,,,9,,,,,6.5 +r11,always,r10,,,,,10,,,,,6.5 diff --git a/subways/tests/sample_data_for_outputs.py b/subways/tests/sample_data_for_outputs.py index b50ddbe2..fd2cf434 100644 --- a/subways/tests/sample_data_for_outputs.py +++ b/subways/tests/sample_data_for_outputs.py @@ -163,6 +163,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": null, "stops": [ { "stoparea_id": "n1", @@ -197,6 +198,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": null, "stops": [ { "stoparea_id": "r3", @@ -237,6 +239,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": 600, "stops": [ { "stoparea_id": "n4", @@ -267,6 +270,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": 480, "stops": [ { "stoparea_id": "n6", @@ -313,6 +317,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": 300, "stops": [ { "stoparea_id": "r4", @@ -339,6 +344,7 @@ "start_time": null, "end_time": null, "interval": null, + "duration": 300, "stops": [ { "stoparea_id": "r16", diff --git a/subways/tests/test_route.py b/subways/tests/test_route.py new file mode 100644 index 00000000..ec82e41f --- /dev/null +++ b/subways/tests/test_route.py @@ -0,0 +1,141 @@ +from unittest import TestCase + +from subways.structure.route import ( + get_interval_in_seconds_from_tags, + osm_interval_to_seconds, + parse_time_range, +) + + +class TestTimeIntervalsParsing(TestCase): + def test__osm_interval_to_seconds__invalid_value(self) -> None: + intervals = ( + ["", "abc", "x30", "30x", "3x0"] + + ["5:", ":5", "01:05:", ":01:05", "01:01:00:", ":01:01:00"] + + ["01x:05", "01:x5", "x5:01:00", "01:0x:00", "01:01:x"] + + ["-5", "01:-05", "-01:05", "-01:00:00", "01:-01:00", "01:01:-01"] + + ["0", "00:00", "00:00:00"] + + ["00:60", "01:00:60", "01:60:00"] + + ["01:60:61", "01:61:60", "01:61:61"] + ) + for interval in intervals: + with self.subTest(msg=f"value='{interval}'"): + self.assertIsNone(osm_interval_to_seconds(interval)) + + def test__osm_interval_to_seconds__valid_value(self) -> None: + intervals = { + "5": 300, + "65": 3900, + "10:55": 39300, + "02:02:02": 7322, + "2:2:2": 7322, + "00:59": 3540, + "01:00": 3600, + "00:00:50": 50, + "00:10:00": 600, + "01:00:00": 3600, + } + + for interval_str, interval_sec in intervals.items(): + with self.subTest(msg=f"value='{interval_str}'"): + self.assertEqual( + interval_sec, osm_interval_to_seconds(interval_str) + ) + + def test__parse_time_range__invalid_values(self) -> None: + ranges = ( + ["", "a", "ab:cd-ab:cd", "1", "1-2", "01-02"] + + ["24/8", "24/7/365"] + + ["1:00-02:00", "01:0-02:00", "01:00-2:00", "01:00-02:0"] + + ["1x:00-02:00", "01:0x-02:00", "01:00-1x:00", "01:00-02:ab"] + + ["-1:00-02:00", "01:-1-02:00", "01:00--2:00", "01:00-02:-1"] + + ["01;00-02:00", "01:00-02;00", "01:00=02:00"] + + ["01:00-#02:00", "01:00 - 02:00"] + + ["01:60-02:05", "01:00-01:61"] + ) + for r in ranges: + with self.subTest(msg=f"value='{r}'"): + self.assertIsNone(parse_time_range(r)) + + def test__parse_time_range__valid_values(self) -> None: + ranges = ( + ["24/7"] + + ["00:00-00:00", "00:01-00:02"] + + ["01:00-02:00", "02:01-01:02"] + + ["02:00-26:59", "12:01-13:59"] + + ["Mo-Fr 06:00-21:30", "06:00-21:30 (weekdays)"] + + ["Mo-Fr 06:00-21:00; Sa-Su 07:00-20:00"] + ) + answers = [ + ((0, 0), (24, 0)), + ((0, 0), (0, 0)), + ((0, 1), (0, 2)), + ((1, 0), (2, 0)), + ((2, 1), (1, 2)), + ((2, 0), (26, 59)), + ((12, 1), (13, 59)), + ((6, 0), (21, 30)), + ((6, 0), (21, 30)), + ((6, 0), (21, 0)), + ] + + for r, answer in zip(ranges, answers): + with self.subTest(msg=f"value='{r}'"): + self.assertTupleEqual(answer, parse_time_range(r)) + + +class TestRouteIntervals(TestCase): + def test__get_interval_in_seconds_from_tags__one_key(self) -> None: + cases = [ + {"tags": {}, "answer": None}, + {"tags": {"a": "1"}, "answer": None}, + {"tags": {"duration": "1"}, "answer": 60}, + {"tags": {"durationxxx"}, "answer": None}, + {"tags": {"xxxduration"}, "answer": None}, + # prefixes not considered + {"tags": {"ru:duration"}, "answer": None}, + # suffixes considered + {"tags": {"duration:peak": "1"}, "answer": 60}, + # bare tag has precedence over suffixed version + {"tags": {"duration:peak": "1", "duration": "2"}, "answer": 120}, + # first suffixed version apply + {"tags": {"duration:y": "1", "duration:x": "2"}, "answer": 60}, + # other tags present + {"tags": {"a": "x", "duration": "1", "b": "y"}, "answer": 60}, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags( + case["tags"], "duration" + ), + ) + + def test__get_interval_in_seconds_from_tags__several_keys(self) -> None: + keys = ("interval", "headway") + cases = [ + {"tags": {}, "answer": None}, + # prefixes not considered + {"tags": {"ru:interval"}, "answer": None}, + {"tags": {"interval": "1"}, "answer": 60}, + {"tags": {"headway": "1"}, "answer": 60}, + {"tags": {"interval": "1", "headway": "2"}, "answer": 60}, + # interval has precedence due to its position in 'keys' + {"tags": {"headway": "2", "interval": "1"}, "answer": 60}, + # non-suffixed keys has precedence + {"tags": {"interval:peak": "1", "headway": "2"}, "answer": 120}, + # among suffixed versions, first key in 'keys' is used first + { + "tags": {"headway:peak": "2", "interval:peak": "1"}, + "answer": 60, + }, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags(case["tags"], keys), + ) From a0a6fb043f177d29e734ec388f51fdffe18af4ce Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 21 Jun 2024 13:29:51 +0300 Subject: [PATCH 13/13] Upgrade shapely to 2.0.4 and fixate indirect requirements --- tools/make_poly/requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/make_poly/requirements.txt b/tools/make_poly/requirements.txt index 67ee66cf..03bc2d90 100644 --- a/tools/make_poly/requirements.txt +++ b/tools/make_poly/requirements.txt @@ -1 +1,4 @@ -shapely==2.0.1 +shapely==2.0.4 + +# Fixate versions of indirect requirements +NumPy==2.0.0