diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b7352615..37b8af81 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,17 +23,25 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.11" - - name: Install dependencies + - name: Install dependencies for linters run: | python -m pip install --upgrade pip - pip install flake8==6.0.0 black==23.1.0 shapely==2.0.1 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install flake8==6.0.0 black==23.1.0 - name: Lint with flake8 run: | flake8 - name: Check with black run: | black --check --line-length 79 . - - name: Test with unittest + - name: Test subways with unittest run: | - python -m unittest discover tests + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r subways/requirements.txt + python -m unittest discover subways + - name: Test tools with unittest + run: | + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r tools/make_poly/requirements.txt + python -m unittest discover tools/make_poly diff --git a/.gitignore b/.gitignore index f2fb32fb..129911ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ tmp_html/ html/ .idea .DS_Store +.venv *.log *.json *.geojson diff --git a/README.md b/README.md index 428b46dd..29fd12e5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Subway Preprocessor Here you see a list of scripts that can be used for preprocessing all the metro -systems in the world from OpenStreetMap. `subway_structure.py` produces +systems in the world from OpenStreetMap. `subways` package produces a list of disjunct systems that can be used for routing and for displaying of metro maps. @@ -16,14 +16,14 @@ of metro maps. 2. If you don't specify `--xml` or `--source` option to the `process_subways.py` script it tries to fetch data over [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API). **Not suitable for the whole planet or large countries.** -* Run `process_subways.py` with appropriate set of command line arguments +* Run `scripts/process_subways.py` with appropriate set of command line arguments to build metro structures and receive a validation log. -* Run `validation_to_html.py` on that log to create readable HTML tables. +* Run `tools/v2h/validation_to_html.py` on that log to create readable HTML tables. ## Validating of all metro networks -There is a `process_subways.sh` in the `scripts` directory that is suitable +There is a `scripts/process_subways.sh` script that is suitable for validation of all or many metro networks. It relies on a bunch of environment variables and takes advantage of previous validation runs for effective recurring validations. See @@ -51,17 +51,23 @@ a city's bbox has been extended. ## Validating of a single city A single city or a country with few metro networks can be validated much faster -if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps: +if you allow the `scripts/process_subway.py` to fetch data from Overpass API. Here are the steps: 1. Python3 interpreter required (3.11+) 2. Clone the repo - ``` + ```bash git clone https://github.com/alexey-zakharenkov/subways.git subways_validator cd subways_validator ``` -3. Execute +3. Configure python environment, e.g. + ```bash + python3 -m venv scripts/.venv + source scripts/.venv/bin/activate + pip install scripts/requirements.txt + ``` +4. Execute ```bash - python3 ./process_subways.py -c "London" \ + python3 scripts/process_subways.py -c "London" \ -l validation.log -d London.yaml ``` here @@ -73,28 +79,39 @@ if you allow the `process_subway.py` to fetch data from Overpass API. Here are t `validation.log` would contain the list of errors and warnings. To convert it into pretty HTML format -4. do +5. do ```bash mkdir html - python3 ./validation_to_html.py validation.log html + python3 tools/v2h/validation_to_html.py validation.log html ``` +## Publishing validation reports to the Web + +Expose a directory with static contents via a web-server and put into it: +- HTML files from the directory specified in the 2nd parameter of `scripts/v2h/validation_to_html.py` +- To vitalize "Y" (YAML), "J" (GeoJSON) and "M" (Map) links beside each city name: + - The contents of `render` directory from the repository + - `cities.txt` file generated with `--dump-city-list` parameter of `scripts/process_subways.py` + - YAML files created due to -d option of `scripts/process_subways.py` + - GeoJSON files created due to -j option of `scripts/process_subways.py` + + ## Related external resources Summary information about all metro networks that are monitored is gathered in the [Google Spreadsheet](https://docs.google.com/spreadsheets/d/1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k). Regular updates of validation results are available at [Organic Maps](https://cdn.organicmaps.app/subway/) and -[mail.ru](https://maps.mail.ru/osm/tools/subways/latest/) servers. +[this website](https://maps.vk.com/osm/tools/subways/latest/). You can find more info about this validator instance in [OSM Wiki](https://wiki.openstreetmap.org/wiki/Quality_assurance#subway-preprocessor). ## Adding Stop Areas To OSM -To quickly add `stop_area` relations for the entire city, use the `make_stop_areas.py` script -from the `stop_area` directory. Give it a bounding box or a `.json` file download from Overpass API. -It would produce an JOSM XML file that you should manually check in JOSM. After that +To quickly add `stop_area` relations for the entire city, use the `tools/stop_areas/make_stop_areas.py` script. +Give it a bounding box or a `.json` file download from Overpass API. +It would produce a JOSM XML file that you should manually check in JOSM. After that just upload it. ## Author and License diff --git a/process_subways.py b/process_subways.py deleted file mode 100755 index 6f7e846f..00000000 --- a/process_subways.py +++ /dev/null @@ -1,549 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import csv -import inspect -import json -import logging -import os -import re -import sys -import time -import urllib.parse -import urllib.request -from functools import partial - -import processors -from subway_io import ( - dump_yaml, - load_xml, - make_geojson, - read_recovery_data, - write_recovery_data, -) -from subway_structure import ( - City, - CriticalValidationError, - find_transfers, - get_unused_entrances_geojson, - MODES_OVERGROUND, - MODES_RAPID, -) - -DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" -DEFAULT_CITIES_INFO_URL = ( - "https://docs.google.com/spreadsheets/d/" - f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" -) - -Point = tuple[float, float] - - -def overpass_request( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: - query = "[out:json][timeout:1000];(" - modes = MODES_OVERGROUND if overground else MODES_RAPID - for bbox in bboxes: - bbox_part = "({})".format(",".join(str(coord) for coord in bbox)) - query += "(" - for mode in modes: - query += 'rel[route="{}"]{};'.format(mode, bbox_part) - query += ");" - query += "rel(br)[type=route_master];" - if not overground: - query += "node[railway=subway_entrance]{};".format(bbox_part) - query += "rel[public_transport=stop_area]{};".format(bbox_part) - query += ( - "rel(br)[type=public_transport][public_transport=stop_area_group];" - ) - query += ");(._;>>;);out body center qt;" - logging.debug("Query: %s", query) - url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) - response = urllib.request.urlopen(url, timeout=1000) - if (r_code := response.getcode()) != 200: - raise Exception(f"Failed to query Overpass API: HTTP {r_code}") - return json.load(response)["elements"] - - -def multi_overpass( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: - SLICE_SIZE = 10 - INTERREQUEST_WAIT = 5 # in seconds - result = [] - for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE): - if i > 0: - time.sleep(INTERREQUEST_WAIT) - bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 - result.extend(overpass_request(overground, overpass_api, bboxes_i)) - return result - - -def slugify(name: str) -> str: - return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) - - -def get_way_center( - element: dict, node_centers: dict[int, Point] -) -> Point | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then ways already have 'center' attribute - if "center" in element: - return element["center"]["lat"], element["center"]["lon"] - - if "nodes" not in element: - return None - - center = [0, 0] - count = 0 - way_nodes = element["nodes"] - way_nodes_len = len(element["nodes"]) - for i, nd in enumerate(way_nodes): - if nd not in node_centers: - continue - # Don't count the first node of a closed way twice - if ( - i == way_nodes_len - 1 - and way_nodes_len > 1 - and way_nodes[0] == way_nodes[-1] - ): - break - center[0] += node_centers[nd][0] - center[1] += node_centers[nd][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] - - -def get_relation_center( - element: dict, - node_centers: dict[int, Point], - way_centers: dict[int, Point], - relation_centers: dict[int, Point], - ignore_unlocalized_child_relations: bool = False, -) -> Point | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :param way_centers: osm_id => (lat, lon) - :param relation_centers: osm_id => (lat, lon) - :param ignore_unlocalized_child_relations: if a member that is a relation - has no center, skip it and calculate center based on member nodes, - ways and other, "localized" (with known centers), relations - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then some relations already have 'center' - # attribute. But this is not the case for relations composed only - # of other relations (e.g., route_master, stop_area_group or - # stop_area with only members that are multipolygons) - if "center" in element: - return element["center"]["lat"], element["center"]["lon"] - - center = [0, 0] - count = 0 - for m in element.get("members", list()): - m_id = m["ref"] - m_type = m["type"] - if m_type == "relation" and m_id not in relation_centers: - if ignore_unlocalized_child_relations: - continue - else: - # Cannot calculate fair center because the center - # of a child relation is not known yet - return None - member_container = ( - node_centers - if m_type == "node" - else way_centers - if m_type == "way" - else relation_centers - ) - if m_id in member_container: - center[0] += member_container[m_id][0] - center[1] += member_container[m_id][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] - - -def calculate_centers(elements: list[dict]) -> None: - """Adds 'center' key to each way/relation in elements, - except for empty ways or relations. - Relies on nodes-ways-relations order in the elements list. - """ - nodes: dict[int, Point] = {} # id => (lat, lon) - ways: dict[int, Point] = {} # id => (lat, lon) - relations: dict[int, Point] = {} # id => (lat, lon) - - unlocalized_relations = [] # 'unlocalized' means the center of the - # relation has not been calculated yet - - for el in elements: - if el["type"] == "node": - nodes[el["id"]] = (el["lat"], el["lon"]) - elif el["type"] == "way": - if center := get_way_center(el, nodes): - ways[el["id"]] = center - elif el["type"] == "relation": - if center := get_relation_center(el, nodes, ways, relations): - relations[el["id"]] = center - else: - unlocalized_relations.append(el) - - def iterate_relation_centers_calculation( - ignore_unlocalized_child_relations: bool, - ) -> list[dict]: - unlocalized_relations_upd = [] - for rel in unlocalized_relations: - if center := get_relation_center( - rel, nodes, ways, relations, ignore_unlocalized_child_relations - ): - relations[rel["id"]] = center - else: - unlocalized_relations_upd.append(rel) - return unlocalized_relations_upd - - # Calculate centers for relations that have no one yet - while unlocalized_relations: - unlocalized_relations_upd = iterate_relation_centers_calculation(False) - progress = len(unlocalized_relations_upd) < len(unlocalized_relations) - if not progress: - unlocalized_relations_upd = iterate_relation_centers_calculation( - True - ) - progress = len(unlocalized_relations_upd) < len( - unlocalized_relations - ) - if not progress: - break - unlocalized_relations = unlocalized_relations_upd - - -def add_osm_elements_to_cities( - osm_elements: list[dict], cities: list[City] -) -> None: - for el in osm_elements: - for c in cities: - if c.contains(el): - c.add(el) - - -def validate_cities(cities: list[City]) -> list[City]: - """Validate cities. Return list of good cities.""" - good_cities = [] - for c in cities: - try: - c.extract_routes() - except CriticalValidationError as e: - logging.error( - "Critical validation error while processing %s: %s", - c.name, - e, - ) - c.error(str(e)) - except AssertionError as e: - logging.error( - "Validation logic error while processing %s: %s", - c.name, - e, - ) - c.error(f"Validation logic error: {e}") - else: - c.validate() - if c.is_good: - c.calculate_distances() - good_cities.append(c) - - return good_cities - - -def get_cities_info( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, -) -> list[dict]: - response = urllib.request.urlopen(cities_info_url) - if ( - not cities_info_url.startswith("file://") - and (r_code := response.getcode()) != 200 - ): - raise Exception( - f"Failed to download cities spreadsheet: HTTP {r_code}" - ) - data = response.read().decode("utf-8") - reader = csv.DictReader( - data.splitlines(), - fieldnames=( - "id", - "name", - "country", - "continent", - "num_stations", - "num_lines", - "num_light_lines", - "num_interchanges", - "bbox", - "networks", - ), - ) - - cities_info = list() - names = set() - next(reader) # skipping the header - for city_info in reader: - if city_info["id"] and city_info["bbox"]: - cities_info.append(city_info) - name = city_info["name"].strip() - if name in names: - logging.warning( - "Duplicate city name in city list: %s", - city_info, - ) - names.add(name) - return cities_info - - -def prepare_cities( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False -) -> list[City]: - if overground: - raise NotImplementedError("Overground transit not implemented yet") - cities_info = get_cities_info(cities_info_url) - return list(map(partial(City, overground=overground), cities_info)) - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument( - "--cities-info-url", - default=DEFAULT_CITIES_INFO_URL, - help=( - "URL of CSV file with reference information about rapid transit " - "networks. file:// protocol is also supported." - ), - ) - parser.add_argument( - "-i", - "--source", - help="File to write backup of OSM data, or to read data from", - ) - parser.add_argument( - "-x", "--xml", help="OSM extract with routes, to read data from" - ) - parser.add_argument( - "--overpass-api", - default="http://overpass-api.de/api/interpreter", - help="Overpass API URL", - ) - parser.add_argument( - "-q", - "--quiet", - action="store_true", - help="Show only warnings and errors", - ) - parser.add_argument( - "-c", "--city", help="Validate only a single city or a country" - ) - parser.add_argument( - "-t", - "--overground", - action="store_true", - help="Process overground transport instead of subways", - ) - parser.add_argument( - "-e", - "--entrances", - type=argparse.FileType("w", encoding="utf-8"), - help="Export unused subway entrances as GeoJSON here", - ) - parser.add_argument( - "-l", - "--log", - type=argparse.FileType("w", encoding="utf-8"), - help="Validation JSON file name", - ) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - if not processor_name.startswith("_"): - parser.add_argument( - f"--output-{processor_name}", - help=( - "Processed metro systems output filename " - f"in {processor_name.upper()} format" - ), - ) - - parser.add_argument("--cache", help="Cache file name for processed data") - parser.add_argument( - "-r", "--recovery-path", help="Cache file name for error recovery" - ) - parser.add_argument( - "-d", "--dump", help="Make a YAML file for a city data" - ) - parser.add_argument( - "-j", "--geojson", help="Make a GeoJSON file for a city data" - ) - parser.add_argument( - "--crude", - action="store_true", - help="Do not use OSM railway geometry for GeoJSON", - ) - options = parser.parse_args() - - if options.quiet: - log_level = logging.WARNING - else: - log_level = logging.INFO - logging.basicConfig( - level=log_level, - datefmt="%H:%M:%S", - format="%(asctime)s %(levelname)-7s %(message)s", - ) - - cities = prepare_cities(options.cities_info_url, options.overground) - if options.city: - cities = [ - c - for c in cities - if c.name == options.city or c.country == options.city - ] - if not cities: - logging.error("No cities to process") - sys.exit(2) - - # Augment cities with recovery data - recovery_data = None - if options.recovery_path: - recovery_data = read_recovery_data(options.recovery_path) - for city in cities: - city.recovery_data = recovery_data.get(city.name, None) - - logging.info("Read %s metro networks", len(cities)) - - # Reading cached json, loading XML or querying Overpass API - if options.source and os.path.exists(options.source): - logging.info("Reading %s", options.source) - with open(options.source, "r") as f: - osm = json.load(f) - if "elements" in osm: - osm = osm["elements"] - calculate_centers(osm) - elif options.xml: - logging.info("Reading %s", options.xml) - osm = load_xml(options.xml) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - else: - if len(cities) > 10: - logging.error( - "Would not download that many cities from Overpass API, " - "choose a smaller set" - ) - sys.exit(3) - bboxes = [c.bbox for c in cities] - logging.info("Downloading data from Overpass API") - osm = multi_overpass(options.overground, options.overpass_api, bboxes) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - logging.info("Downloaded %s elements", len(osm)) - - logging.info("Sorting elements by city") - add_osm_elements_to_cities(osm, cities) - - logging.info("Building routes for each city") - good_cities = validate_cities(cities) - - logging.info("Finding transfer stations") - transfers = find_transfers(osm, cities) - - good_city_names = set(c.name for c in good_cities) - logging.info( - "%s good cities: %s", - len(good_city_names), - ", ".join(sorted(good_city_names)), - ) - bad_city_names = set(c.name for c in cities) - good_city_names - logging.info( - "%s bad cities: %s", - len(bad_city_names), - ", ".join(sorted(bad_city_names)), - ) - - if options.recovery_path: - write_recovery_data(options.recovery_path, recovery_data, cities) - - if options.entrances: - json.dump(get_unused_entrances_geojson(osm), options.entrances) - - if options.dump: - if os.path.isdir(options.dump): - for c in cities: - with open( - os.path.join(options.dump, slugify(c.name) + ".yaml"), - "w", - encoding="utf-8", - ) as f: - dump_yaml(c, f) - elif len(cities) == 1: - with open(options.dump, "w", encoding="utf-8") as f: - dump_yaml(cities[0], f) - else: - logging.error("Cannot dump %s cities at once", len(cities)) - - if options.geojson: - if os.path.isdir(options.geojson): - for c in cities: - with open( - os.path.join( - options.geojson, slugify(c.name) + ".geojson" - ), - "w", - encoding="utf-8", - ) as f: - json.dump(make_geojson(c, not options.crude), f) - elif len(cities) == 1: - with open(options.geojson, "w", encoding="utf-8") as f: - json.dump(make_geojson(cities[0], not options.crude), f) - else: - logging.error( - "Cannot make a geojson of %s cities at once", len(cities) - ) - - if options.log: - res = [] - for c in cities: - v = c.get_validation_result() - v["slug"] = slugify(c.name) - res.append(v) - json.dump(res, options.log, indent=2, ensure_ascii=False) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - option_name = f"output_{processor_name}" - - if not getattr(options, option_name, None): - continue - - filename = getattr(options, option_name) - processor.process(cities, transfers, filename, options.cache) - - -if __name__ == "__main__": - main() diff --git a/scripts/build_city.sh b/scripts/build_city.sh deleted file mode 100755 index 6b8d8af0..00000000 --- a/scripts/build_city.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities() if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_subways.sh" "$1" diff --git a/scripts/build_trams.sh b/scripts/build_trams.sh deleted file mode 100755 index 6b62d245..00000000 --- a/scripts/build_trams.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities(True) if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_trams.sh" "$1" diff --git a/scripts/download_all_subways.sh b/scripts/download_all_subways.sh deleted file mode 100755 index 2797520c..00000000 --- a/scripts/download_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Still times out, do not use unless you want to be blocked for some hours on Overpass API -TIMEOUT=2000 -QUERY='[out:json][timeout:'$TIMEOUT'];(rel["route"="subway"];rel["route"="light_rail"];rel["public_transport"="stop_area"];rel["public_transport"="stop_area_group"];node["station"="subway"];node["station"="light_rail"];node["railway"="subway_entrance"];);(._;>;);out body center qt;' -http http://overpass-api.de/api/interpreter "data==$QUERY" --timeout $TIMEOUT > subways-$(date +%y%m%d).json -http https://overpass-api.de/api/status | grep available diff --git a/scripts/filter_all_subways.sh b/scripts/filter_all_subways.sh deleted file mode 100755 index 5627f10b..00000000 --- a/scripts/filter_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -[ $# -lt 1 ] && echo 'Usage: $0 [] []' && exit 1 -OSMFILTER=${2-./osmfilter} -QRELATIONS="route=subway =light_rail =monorail route_master=subway =light_rail =monorail public_transport=stop_area =stop_area_group" -QNODES="station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes" -"$OSMFILTER" "$1" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author -o="${3:-subways-$(date +%y%m%d).osm}" diff --git a/scripts/process_subways.py b/scripts/process_subways.py new file mode 100755 index 00000000..65d16007 --- /dev/null +++ b/scripts/process_subways.py @@ -0,0 +1,276 @@ +import argparse +import inspect +import json +import logging +import os +import re +import sys + +from subways import processors +from subways.overpass import multi_overpass +from subways.subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from subways.structure.city import ( + find_transfers, + get_unused_subway_entrances_geojson, +) +from subways.validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + prepare_cities, + validate_cities, +) + + +def slugify(name: str) -> str: + return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + parser.add_argument( + "-i", + "--source", + help="File to write backup of OSM data, or to read data from", + ) + parser.add_argument( + "-x", "--xml", help="OSM extract with routes, to read data from" + ) + parser.add_argument( + "--overpass-api", + default="http://overpass-api.de/api/interpreter", + help="Overpass API URL", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Show only warnings and errors", + ) + parser.add_argument( + "-c", "--city", help="Validate only a single city or a country" + ) + parser.add_argument( + "-t", + "--overground", + action="store_true", + help="Process overground transport instead of subways", + ) + parser.add_argument( + "-e", + "--entrances", + type=argparse.FileType("w", encoding="utf-8"), + help="Export unused subway entrances as GeoJSON here", + ) + parser.add_argument( + "-l", + "--log", + type=argparse.FileType("w", encoding="utf-8"), + help="Validation JSON file name", + ) + parser.add_argument( + "--dump-city-list", + type=argparse.FileType("w", encoding="utf-8"), + help=( + "Dump sorted list of all city names, possibly with " + f"{BAD_MARK} mark" + ), + ) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + if not processor_name.startswith("_"): + parser.add_argument( + f"--output-{processor_name}", + help=( + "Processed metro systems output filename " + f"in {processor_name.upper()} format" + ), + ) + + parser.add_argument("--cache", help="Cache file name for processed data") + parser.add_argument( + "-r", "--recovery-path", help="Cache file name for error recovery" + ) + parser.add_argument( + "-d", "--dump", help="Make a YAML file for a city data" + ) + parser.add_argument( + "-j", "--geojson", help="Make a GeoJSON file for a city data" + ) + parser.add_argument( + "--crude", + action="store_true", + help="Do not use OSM railway geometry for GeoJSON", + ) + options = parser.parse_args() + + if options.quiet: + log_level = logging.WARNING + else: + log_level = logging.INFO + logging.basicConfig( + level=log_level, + datefmt="%H:%M:%S", + format="%(asctime)s %(levelname)-7s %(message)s", + ) + + cities = prepare_cities(options.cities_info_url, options.overground) + if options.city: + cities = [ + c + for c in cities + if c.name == options.city or c.country == options.city + ] + if not cities: + logging.error("No cities to process") + sys.exit(2) + + # Augment cities with recovery data + recovery_data = None + if options.recovery_path: + recovery_data = read_recovery_data(options.recovery_path) + for city in cities: + city.recovery_data = recovery_data.get(city.name, None) + + logging.info("Read %s metro networks", len(cities)) + + # Reading cached json, loading XML or querying Overpass API + if options.source and os.path.exists(options.source): + logging.info("Reading %s", options.source) + with open(options.source, "r") as f: + osm = json.load(f) + if "elements" in osm: + osm = osm["elements"] + calculate_centers(osm) + elif options.xml: + logging.info("Reading %s", options.xml) + osm = load_xml(options.xml) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + else: + if len(cities) > 10: + logging.error( + "Would not download that many cities from Overpass API, " + "choose a smaller set" + ) + sys.exit(3) + bboxes = [c.bbox for c in cities] + logging.info("Downloading data from Overpass API") + osm = multi_overpass(options.overground, options.overpass_api, bboxes) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + logging.info("Downloaded %s elements", len(osm)) + + logging.info("Sorting elements by city") + add_osm_elements_to_cities(osm, cities) + + logging.info("Building routes for each city") + good_cities = validate_cities(cities) + + logging.info("Finding transfer stations") + transfers = find_transfers(osm, good_cities) + + good_city_names = set(c.name for c in good_cities) + logging.info( + "%s good cities: %s", + len(good_city_names), + ", ".join(sorted(good_city_names)), + ) + bad_city_names = set(c.name for c in cities) - good_city_names + logging.info( + "%s bad cities: %s", + len(bad_city_names), + ", ".join(sorted(bad_city_names)), + ) + + if options.dump_city_list: + lines = sorted( + f"{city.name}, {city.country}" + f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" + for city in cities + ) + options.dump_city_list.writelines(lines) + + if options.recovery_path: + write_recovery_data(options.recovery_path, recovery_data, cities) + + if options.entrances: + json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) + + if options.dump: + if os.path.isdir(options.dump): + for c in cities: + with open( + os.path.join(options.dump, slugify(c.name) + ".yaml"), + "w", + encoding="utf-8", + ) as f: + dump_yaml(c, f) + elif len(cities) == 1: + with open(options.dump, "w", encoding="utf-8") as f: + dump_yaml(cities[0], f) + else: + logging.error("Cannot dump %s cities at once", len(cities)) + + if options.geojson: + if os.path.isdir(options.geojson): + for c in cities: + with open( + os.path.join( + options.geojson, slugify(c.name) + ".geojson" + ), + "w", + encoding="utf-8", + ) as f: + json.dump(make_geojson(c, not options.crude), f) + elif len(cities) == 1: + with open(options.geojson, "w", encoding="utf-8") as f: + json.dump(make_geojson(cities[0], not options.crude), f) + else: + logging.error( + "Cannot make a geojson of %s cities at once", len(cities) + ) + + if options.log: + res = [] + for c in cities: + v = c.get_validation_result() + v["slug"] = slugify(c.name) + res.append(v) + json.dump(res, options.log, indent=2, ensure_ascii=False) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + option_name = f"output_{processor_name}" + + if not getattr(options, option_name, None): + continue + + filename = getattr(options, option_name) + processor.process(cities, transfers, filename, options.cache) + + +if __name__ == "__main__": + main() diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 26eb89df..0d54cea8 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -53,6 +53,7 @@ Environment variable reference: - GIT_PULL: set to 1 to update the scripts - TMPDIR: path to temporary files - HTML_DIR: target path for generated HTML files + - DUMP_CITY_LIST: file name to save sorted list of cities, with [bad] mark for bad cities - SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/) - SERVER_KEY: rsa key to supply for uploading the files - REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading @@ -62,6 +63,22 @@ EOF fi +function activate_venv_at_path() { + path=$1 + + if [ ! -d "$path/".venv ]; then + "${PYTHON:-python3.11}" -m venv "$path"/.venv + fi + + source "$path"/.venv/bin/activate + + if [ -f "$path"/requirements.txt ]; then + pip install --upgrade pip + pip install -r "$path"/requirements.txt + fi +} + + function check_osmctools() { OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" if [ ! -f "$OSMCTOOLS/osmupdate" ]; then @@ -90,37 +107,39 @@ function check_poly() { if [ -z "${POLY-}" -o ! -f "${POLY-}" ]; then POLY=${POLY:-$(mktemp "$TMPDIR/all-metro.XXXXXXXX.poly")} - if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then - "$PYTHON" -m pip install shapely==2.0.1 - fi - "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \ + activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/make_poly" + python "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" + deactivate fi fi POLY_CHECKED=1 fi } +# "readlink -f" echoes canonicalized absolute path to a file/directory +SUBWAYS_REPO_PATH="$(readlink -f $(dirname "$0")/..)" -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null - -SUBWAYS_PATH="$(dirname "$0")/.." -if [ ! -f "$SUBWAYS_PATH/process_subways.py" ]; then - echo "Please clone the subways repo to $SUBWAYS_PATH" +if [ ! -f "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ]; then + echo "Please clone the subways repo to $SUBWAYS_REPO_PATH" exit 2 fi -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" -mkdir -p "$TMPDIR" +# Contains 'subways' dir and is required by the main validator python script +# as well as by some tools +export PYTHONPATH="$SUBWAYS_REPO_PATH" # Downloading the latest version of the subways script if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" + pushd "$SUBWAYS_REPO_PATH" git pull origin master + popd ) fi + +TMPDIR="${TMPDIR:-"$SUBWAYS_REPO_PATH"}" +mkdir -p "$TMPDIR" + if [ -z "${FILTERED_DATA-}" ]; then FILTERED_DATA="$TMPDIR/subways.osm" NEED_TO_REMOVE_FILTERED_DATA=1 @@ -217,7 +236,7 @@ if [ -n "${NEED_FILTER-}" ]; then check_osmctools mkdir -p $TMPDIR/osmfilter_temp/ QRELATIONS="route=subway =light_rail =monorail =train route_master=subway =light_rail =monorail =train public_transport=stop_area =stop_area_group" - QNODES="railway=station station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes train=yes" + QNODES="railway=station =subway_entrance =train_station_entrance station=subway =light_rail =monorail subway=yes light_rail=yes monorail=yes train=yes" "$OSMCTOOLS/osmfilter" "$PLANET_METRO" \ --keep= \ --keep-relations="$QRELATIONS" \ @@ -244,15 +263,22 @@ if [ -n "${DUMP-}" ]; then fi VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/scripts" +python "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ ${GTFS:+--output-gtfs "$GTFS"} \ - ${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \ + ${CITY:+-c "$CITY"} \ + ${DUMP:+-d "$DUMP"} \ + ${GEOJSON:+-j "$GEOJSON"} \ + ${DUMP_CITY_LIST:+--dump-city-list "$DUMP_CITY_LIST"} \ ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ ${RECOVERY_PATH:+-r "$RECOVERY_PATH"} +deactivate + if [ -n "${NEED_TO_REMOVE_FILTERED_DATA-}" ]; then rm "$FILTERED_DATA" @@ -261,15 +287,18 @@ fi # Preparing HTML files if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" + HTML_DIR="$SUBWAYS_REPO_PATH/html" REMOVE_HTML=1 fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/v2h" +python "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ "$VALIDATION" "$HTML_DIR" +deactivate # Uploading files to the server diff --git a/scripts/process_trams.sh b/scripts/process_trams.sh deleted file mode 100755 index 84e007a0..00000000 --- a/scripts/process_trams.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/bash -set -e -u - -if [ $# -lt 1 -a -z "${PLANET-}" ]; then - echo "This script updates a planet or an extract, processes tram networks in it" - echo "and produses a set of HTML files with validation results." - echo - echo "Usage: $0 " - echo - echo "Variable reference:" - echo "- PLANET: path for the source o5m file (the entire planet or an extract)" - echo "- CITY: name of a city to process" - echo "- BBOX: bounding box of an extract; x1,y1,x2,y2" - echo "- DUMP: file name to dump city data" - echo "- MAPSME: file name for maps.me json output" - echo "- OSMCTOOLS: path to osmconvert and osmupdate binaries" - echo "- PYTHON: python 3 executable" - echo "- GIT_PULL: set to 1 to update the scripts" - echo "- TMPDIR: path to temporary files" - echo "- HTML_DIR: target path for generated HTML files" - echo "- SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/)" - echo "- SERVER_KEY: rsa key to supply for uploading the files" - echo "- REMOVE_HTML: set to 1 to remove HTML_DIR after uploading" - exit 1 -fi - -[ -n "${WHAT-}" ] && echo WHAT - -PLANET="${PLANET:-${1-}}" -[ ! -f "$PLANET" ] && echo "Cannot find planet file $PLANET" && exit 2 -OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" -if [ ! -f "$OSMCTOOLS/osmupdate" ]; then - if which osmupdate > /dev/null; then - OSMCTOOLS="$(dirname "$(which osmupdate)")" - else - echo "Please compile osmctools to $OSMCTOOLS" - exit 1 - fi -fi -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null -SUBWAYS_PATH="$(dirname "$0")/.." -[ ! -f "$SUBWAYS_PATH/process_subways.py" ] && echo "Please clone the subways repo to $SUBWAYS_PATH" && exit 2 -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" - -# Downloading the latest version of the subways script - - -if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" - git pull origin master -) fi - - -# Updating the planet file - -PLANET_ABS="$(cd "$(dirname "$PLANET")"; pwd)/$(basename "$PLANET")" -( - cd "$OSMCTOOLS" # osmupdate requires osmconvert in a current directory - ./osmupdate --drop-author --out-o5m "$PLANET_ABS" ${BBOX+"-b=$BBOX"} "$PLANET_ABS.new.o5m" && mv "$PLANET_ABS.new.o5m" "$PLANET_ABS" || true -) - -# Filtering it - -FILTERED_DATA="$TMPDIR/subways.osm" -QRELATIONS="route=tram route_master=tram public_transport=stop_area =stop_area_group" -QNODES="railway=tram_stop railway=subway_entrance tram=yes" -"$OSMCTOOLS/osmfilter" "$PLANET" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author "-o=$FILTERED_DATA" - -# Running the validation - -VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -t -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"} -rm "$FILTERED_DATA" - -# Preparing HTML files - -if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" - REMOVE_HTML=1 -fi - -mkdir -p $HTML_DIR -rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR" -rm "$VALIDATION" - -# Uploading files to the server - -if [ -n "${SERVER-}" ]; then - scp -q ${SERVER_KEY+-i "$SERVER_KEY"} "$HTML_DIR"/* "$SERVER" - [ -n "${REMOVE_HTML-}" ] && rm -r "$HTML_DIR" -fi diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..1f71eee1 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +-r ../subways/requirements.txt diff --git a/subway_structure.py b/subway_structure.py deleted file mode 100644 index 823aea61..00000000 --- a/subway_structure.py +++ /dev/null @@ -1,2140 +0,0 @@ -import math -import re -from collections import Counter, defaultdict -from itertools import islice - -from css_colours import normalize_colour - - -MAX_DISTANCE_TO_ENTRANCES = 300 # in meters -MAX_DISTANCE_STOP_TO_LINE = 50 # in meters -ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count -ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count -ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees -DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees - -# If an object was moved not too far compared to previous script run, -# it is likely the same object -DISPLACEMENT_TOLERANCE = 300 # in meters - -MODES_RAPID = set(("subway", "light_rail", "monorail", "train")) -MODES_OVERGROUND = set(("tram", "bus", "trolleybus", "aerialway", "ferry")) -DEFAULT_MODES_RAPID = set(("subway", "light_rail")) -DEFAULT_MODES_OVERGROUND = set(("tram",)) # TODO: bus and trolleybus? -ALL_MODES = MODES_RAPID | MODES_OVERGROUND -RAILWAY_TYPES = set( - ( - "rail", - "light_rail", - "subway", - "narrow_gauge", - "funicular", - "monorail", - "tram", - ) -) -CONSTRUCTION_KEYS = ( - "construction", - "proposed", - "construction:railway", - "proposed:railway", -) - -used_entrances = set() - - -START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") - - -def get_start_end_times(opening_hours): - """Very simplified method to parse OSM opening_hours tag. - We simply take the first HH:MM-HH:MM substring which is the most probable - opening hours interval for the most of weekdays. - """ - start_time, end_time = None, None - m = START_END_TIMES_RE.match(opening_hours) - if m: - ints = tuple(map(int, m.groups())) - start_time = (ints[0], ints[1]) - end_time = (ints[2], ints[3]) - return start_time, end_time - - -def osm_interval_to_seconds(interval_str): - """Convert to int an OSM value for 'interval'/'headway' tag - which may be in these formats: - HH:MM:SS, - HH:MM, - MM, - M - (https://wiki.openstreetmap.org/wiki/Key:interval#Format) - """ - hours, minutes, seconds = 0, 0, 0 - semicolon_count = interval_str.count(":") - try: - if semicolon_count == 0: - minutes = int(interval_str) - elif semicolon_count == 1: - hours, minutes = map(int, interval_str.split(":")) - elif semicolon_count == 2: - hours, minutes, seconds = map(int, interval_str.split(":")) - else: - return None - except ValueError: - return None - return seconds + 60 * minutes + 60 * 60 * hours - - -class CriticalValidationError(Exception): - """Is thrown if an error occurs - that prevents further validation of a city.""" - - -def el_id(el): - if not el: - return None - if "type" not in el: - raise Exception("What is this element? {}".format(el)) - return el["type"][0] + str(el.get("id", el.get("ref", ""))) - - -def el_center(el): - if not el: - return None - if "lat" in el: - return (el["lon"], el["lat"]) - elif "center" in el: - return (el["center"]["lon"], el["center"]["lat"]) - return None - - -def distance(p1, p2): - if p1 is None or p2 is None: - raise Exception( - "One of arguments to distance({}, {}) is None".format(p1, p2) - ) - dx = math.radians(p1[0] - p2[0]) * math.cos( - 0.5 * math.radians(p1[1] + p2[1]) - ) - dy = math.radians(p1[1] - p2[1]) - return 6378137 * math.sqrt(dx * dx + dy * dy) - - -def is_near(p1, p2): - return ( - p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 - and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 - ) - - -def project_on_segment(p, p1, p2): - """Given three points, return u - the position of projection of - point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector - """ - dp = (p2[0] - p1[0], p2[1] - p1[1]) - d2 = dp[0] * dp[0] + dp[1] * dp[1] - if d2 < 1e-14: - return None - u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 - if not 0 <= u <= 1: - return None - return u - - -def project_on_line(p, line): - result = { - # In the first approximation, position on rails is the index of the - # closest vertex of line to the point p. Fractional value means that - # the projected point lies on a segment between two vertices. - # More than one value can occur if a route follows the same tracks - # more than once. - "positions_on_line": None, - "projected_point": None, # (lon, lat) - } - - if len(line) < 2: - return result - d_min = MAX_DISTANCE_STOP_TO_LINE * 5 - closest_to_vertex = False - # First, check vertices in the line - for i, vertex in enumerate(line): - d = distance(p, vertex) - if d < d_min: - result["positions_on_line"] = [i] - result["projected_point"] = vertex - d_min = d - closest_to_vertex = True - elif vertex == result["projected_point"]: - # Repeated occurrence of the track vertex in line, like Oslo Line 5 - result["positions_on_line"].append(i) - # And then calculate distances to each segment - for seg in range(len(line) - 1): - # Check bbox for speed - if not ( - ( - min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE - <= p[0] - <= max(line[seg][0], line[seg + 1][0]) - + MAX_DISTANCE_STOP_TO_LINE - ) - and ( - min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE - <= p[1] - <= max(line[seg][1], line[seg + 1][1]) - + MAX_DISTANCE_STOP_TO_LINE - ) - ): - continue - u = project_on_segment(p, line[seg], line[seg + 1]) - if u: - projected_point = ( - line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), - line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), - ) - d = distance(p, projected_point) - if d < d_min: - result["positions_on_line"] = [seg + u] - result["projected_point"] = projected_point - d_min = d - closest_to_vertex = False - elif projected_point == result["projected_point"]: - # Repeated occurrence of the track segment in line, - # like Oslo Line 5 - if not closest_to_vertex: - result["positions_on_line"].append(seg + u) - return result - - -def find_segment(p, line, start_vertex=0): - """Returns index of a segment and a position inside it.""" - EPS = 1e-9 - for seg in range(start_vertex, len(line) - 1): - if is_near(p, line[seg]): - return seg, 0.0 - if line[seg][0] == line[seg + 1][0]: - if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): - continue - px = None - else: - px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) - if px is None or (0 <= px <= 1): - if line[seg][1] == line[seg + 1][1]: - if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): - continue - py = None - else: - py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) - if py is None or (0 <= py <= 1): - if py is None or px is None or (px - EPS <= py <= px + EPS): - return seg, px or py - return None, None - - -def distance_on_line(p1, p2, line, start_vertex=0): - """Calculates distance via line between projections - of points p1 and p2. Returns a TUPLE of (d, vertex): - d is the distance and vertex is the number of the second - vertex, to continue calculations for the next point.""" - line_len = len(line) - seg1, pos1 = find_segment(p1, line, start_vertex) - if seg1 is None: - # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) - return None - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - if line[0] == line[-1]: - line = line + line[1:] - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) - return None - if seg1 == seg2: - return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 - if seg2 < seg1: - # Should not happen - raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) - d = 0 - if pos1 < 1: - d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) - for i in range(seg1 + 1, seg2): - d += distance(line[i], line[i + 1]) - if pos2 > 0: - d += distance(line[seg2], line[seg2 + 1]) * pos2 - return d, seg2 % line_len - - -def angle_between(p1, c, p2): - a = round( - abs( - math.degrees( - math.atan2(p1[1] - c[1], p1[0] - c[0]) - - math.atan2(p2[1] - c[1], p2[0] - c[0]) - ) - ) - ) - return a if a <= 180 else 360 - a - - -def format_elid_list(ids): - msg = ", ".join(sorted(ids)[:20]) - if len(ids) > 20: - msg += ", ..." - return msg - - -class Station: - @staticmethod - def get_modes(el): - mode = el["tags"].get("station") - modes = [] if not mode else [mode] - for m in ALL_MODES: - if el["tags"].get(m) == "yes": - modes.append(m) - return set(modes) - - @staticmethod - def is_station(el, modes): - # public_transport=station is too ambiguous and unspecific to use, - # so we expect for it to be backed by railway=station. - if ( - "tram" in modes - and el.get("tags", {}).get("railway") == "tram_stop" - ): - return True - if el.get("tags", {}).get("railway") not in ("station", "halt"): - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - # Not checking for station=train, obviously - if "train" not in modes and Station.get_modes(el).isdisjoint(modes): - return False - return True - - def __init__(self, el, city): - """Call this with a railway=station node.""" - if not Station.is_station(el, city.modes): - raise Exception( - "Station object should be instantiated from a station node. " - "Got: {}".format(el) - ) - - self.id = el_id(el) - self.element = el - self.modes = Station.get_modes(el) - self.name = el["tags"].get("name", "?") - self.int_name = el["tags"].get( - "int_name", el["tags"].get("name:en", None) - ) - try: - self.colour = normalize_colour(el["tags"].get("colour", None)) - except ValueError as e: - self.colour = None - city.warn(str(e), el) - self.center = el_center(el) - if self.center is None: - raise Exception("Could not find center of {}".format(el)) - - def __repr__(self): - return "Station(id={}, modes={}, name={}, center={})".format( - self.id, ",".join(self.modes), self.name, self.center - ) - - -class StopArea: - @staticmethod - def is_stop(el): - if "tags" not in el: - return False - if el["tags"].get("railway") == "stop": - return True - if el["tags"].get("public_transport") == "stop_position": - return True - return False - - @staticmethod - def is_platform(el): - if "tags" not in el: - return False - if el["tags"].get("railway") in ("platform", "platform_edge"): - return True - if el["tags"].get("public_transport") == "platform": - return True - return False - - @staticmethod - def is_track(el): - if el["type"] != "way" or "tags" not in el: - return False - return el["tags"].get("railway") in RAILWAY_TYPES - - def __init__(self, station, city, stop_area=None): - """Call this with a Station object.""" - - self.element = stop_area or station.element - self.id = el_id(self.element) - self.station = station - self.stops = set() # set of el_ids of stop_positions - self.platforms = set() # set of el_ids of platforms - self.exits = set() # el_id of subway_entrance for leaving the platform - self.entrances = set() # el_id of subway_entrance for entering - # the platform - self.center = None # lon, lat of the station centre point - self.centers = {} # el_id -> (lon, lat) for all elements - self.transfer = None # el_id of a transfer relation - - self.modes = station.modes - self.name = station.name - self.int_name = station.int_name - self.colour = station.colour - - if stop_area: - self.name = stop_area["tags"].get("name", self.name) - self.int_name = stop_area["tags"].get( - "int_name", stop_area["tags"].get("name:en", self.int_name) - ) - try: - self.colour = ( - normalize_colour(stop_area["tags"].get("colour")) - or self.colour - ) - except ValueError as e: - city.warn(str(e), stop_area) - - # If we have a stop area, add all elements from it - warned_about_tracks = False - for m in stop_area["members"]: - k = el_id(m) - m_el = city.elements.get(k) - if m_el and "tags" in m_el: - if Station.is_station(m_el, city.modes): - if k != station.id: - city.error( - "Stop area has multiple stations", stop_area - ) - elif StopArea.is_stop(m_el): - self.stops.add(k) - elif StopArea.is_platform(m_el): - self.platforms.add(k) - elif m_el["tags"].get("railway") == "subway_entrance": - if m_el["type"] != "node": - city.warn("Subway entrance is not a node", m_el) - if ( - m_el["tags"].get("entrance") != "exit" - and m["role"] != "exit_only" - ): - self.entrances.add(k) - if ( - m_el["tags"].get("entrance") != "entrance" - and m["role"] != "entry_only" - ): - self.exits.add(k) - elif StopArea.is_track(m_el): - if not warned_about_tracks: - city.warn( - "Tracks in a stop_area relation", stop_area - ) - warned_about_tracks = True - else: - # Otherwise add nearby entrances - center = station.center - for c_el in city.elements.values(): - if c_el.get("tags", {}).get("railway") == "subway_entrance": - c_id = el_id(c_el) - if c_id not in city.stop_areas: - c_center = el_center(c_el) - if ( - c_center - and distance(center, c_center) - <= MAX_DISTANCE_TO_ENTRANCES - ): - if c_el["type"] != "node": - city.warn( - "Subway entrance is not a node", c_el - ) - etag = c_el["tags"].get("entrance") - if etag != "exit": - self.entrances.add(c_id) - if etag != "entrance": - self.exits.add(c_id) - - if self.exits and not self.entrances: - city.warn( - "Only exits for a station, no entrances", - stop_area or station.element, - ) - if self.entrances and not self.exits: - city.warn("No exits for a station", stop_area or station.element) - - for el in self.get_elements(): - self.centers[el] = el_center(city.elements[el]) - - """Calculate the center point of the station. This algorithm - cannot rely on a station node, since many stop_areas can share one. - Basically it averages center points of all platforms - and stop positions.""" - if len(self.stops) + len(self.platforms) == 0: - self.center = station.center - else: - self.center = [0, 0] - for sp in self.stops | self.platforms: - spc = self.centers[sp] - for i in range(2): - self.center[i] += spc[i] - for i in range(2): - self.center[i] /= len(self.stops) + len(self.platforms) - - def get_elements(self): - result = set([self.id, self.station.id]) - result.update(self.entrances) - result.update(self.exits) - result.update(self.stops) - result.update(self.platforms) - return result - - def __repr__(self): - return ( - f"StopArea(id={self.id}, name={self.name}, station={self.station}," - f" transfer={self.transfer}, center={self.center})" - ) - - -class RouteStop: - def __init__(self, stoparea): - self.stoparea = stoparea - self.stop = None # Stop position (lon, lat), possibly projected - self.distance = 0 # In meters from the start of the route - self.platform_entry = None # Platform el_id - self.platform_exit = None # Platform el_id - self.can_enter = False - self.can_exit = False - self.seen_stop = False - self.seen_platform_entry = False - self.seen_platform_exit = False - self.seen_station = False - - @property - def seen_platform(self): - return self.seen_platform_entry or self.seen_platform_exit - - @staticmethod - def get_actual_role(el, role, modes): - if StopArea.is_stop(el): - return "stop" - elif StopArea.is_platform(el): - return "platform" - elif Station.is_station(el, modes): - if "platform" in role: - return "platform" - else: - return "stop" - return None - - def add(self, member, relation, city): - el = city.elements[el_id(member)] - role = member["role"] - - if StopArea.is_stop(el): - if "platform" in role: - city.warn("Stop position in a platform role in a route", el) - if el["type"] != "node": - city.error("Stop position is not a node", el) - self.stop = el_center(el) - if "entry_only" not in role: - self.can_exit = True - if "exit_only" not in role: - self.can_enter = True - - elif Station.is_station(el, city.modes): - if el["type"] != "node": - city.notice("Station in route is not a node", el) - - if not self.seen_stop and not self.seen_platform: - self.stop = el_center(el) - self.can_enter = True - self.can_exit = True - - elif StopArea.is_platform(el): - if "stop" in role: - city.warn("Platform in a stop role in a route", el) - if "exit_only" not in role: - self.platform_entry = el_id(el) - self.can_enter = True - if "entry_only" not in role: - self.platform_exit = el_id(el) - self.can_exit = True - if not self.seen_stop: - self.stop = el_center(el) - - multiple_check = False - actual_role = RouteStop.get_actual_role(el, role, city.modes) - if actual_role == "platform": - if role == "platform_entry_only": - multiple_check = self.seen_platform_entry - self.seen_platform_entry = True - elif role == "platform_exit_only": - multiple_check = self.seen_platform_exit - self.seen_platform_exit = True - else: - if role != "platform" and "stop" not in role: - city.warn( - f'Platform "{el["tags"].get("name", "")}" ' - f'({el_id(el)}) with invalid role "{role}" in route', - relation, - ) - multiple_check = self.seen_platform - self.seen_platform_entry = True - self.seen_platform_exit = True - elif actual_role == "stop": - multiple_check = self.seen_stop - self.seen_stop = True - if multiple_check: - log_function = city.error if actual_role == "stop" else city.notice - log_function( - f'Multiple {actual_role}s for a station "' - f'{el["tags"].get("name", "")} ' - f"({el_id(el)}) in a route relation", - relation, - ) - - def __repr__(self): - return ( - "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( - self.stop, - self.platform_entry, - self.platform_exit, - self.stoparea, - ) - ) - - -class Route: - """The longest route for a city with a unique ref.""" - - @staticmethod - def is_route(el, modes): - if ( - el["type"] != "relation" - or el.get("tags", {}).get("type") != "route" - ): - return False - if "members" not in el: - return False - if el["tags"].get("route") not in modes: - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - if "ref" not in el["tags"] and "name" not in el["tags"]: - return False - return True - - @staticmethod - def get_network(relation): - for k in ("network:metro", "network", "operator"): - if k in relation["tags"]: - return relation["tags"][k] - return None - - @staticmethod - def get_interval(tags): - v = None - for k in ("interval", "headway"): - if k in tags: - v = tags[k] - break - else: - for kk in tags: - if kk.startswith(k + ":"): - v = tags[kk] - break - if not v: - return None - return osm_interval_to_seconds(v) - - def __init__(self, relation, city, master=None): - assert Route.is_route( - relation, city.modes - ), f"The relation does not seem to be a route: {relation}" - self.city = city - self.element = relation - self.id = el_id(relation) - - self.ref = None - self.name = None - self.mode = None - self.colour = None - self.infill = None - self.network = None - self.interval = None - self.start_time = None - self.end_time = None - self.is_circular = False - self.stops = [] # List of RouteStop - # Would be a list of (lon, lat) for the longest stretch. Can be empty. - self.tracks = None - # Index of the fist stop that is located on/near the self.tracks - self.first_stop_on_rails_index = None - # Index of the last stop that is located on/near the self.tracks - self.last_stop_on_rails_index = None - - self.process_tags(master) - stop_position_elements = self.process_stop_members() - self.process_tracks(stop_position_elements) - - def build_longest_line(self): - line_nodes = set() - last_track = [] - track = [] - warned_about_holes = False - for m in self.element["members"]: - el = self.city.elements.get(el_id(m), None) - if not el or not StopArea.is_track(el): - continue - if "nodes" not in el or len(el["nodes"]) < 2: - self.city.error("Cannot find nodes in a railway", el) - continue - nodes = ["n{}".format(n) for n in el["nodes"]] - if m["role"] == "backward": - nodes.reverse() - line_nodes.update(nodes) - if not track: - is_first = True - track.extend(nodes) - else: - new_segment = list(nodes) # copying - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - elif new_segment[-1] == track[-1]: - track.extend(reversed(new_segment[:-1])) - elif is_first and track[0] in ( - new_segment[0], - new_segment[-1], - ): - # We can reverse the track and try again - track.reverse() - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - else: - track.extend(reversed(new_segment[:-1])) - else: - # Store the track if it is long and clean it - if not warned_about_holes: - self.city.warn( - "Hole in route rails near node {}".format( - track[-1] - ), - self.element, - ) - warned_about_holes = True - if len(track) > len(last_track): - last_track = track - track = [] - is_first = False - if len(track) > len(last_track): - last_track = track - # Remove duplicate points - last_track = [ - last_track[i] - for i in range(0, len(last_track)) - if i == 0 or last_track[i - 1] != last_track[i] - ] - return last_track, line_nodes - - def get_stop_projections(self): - projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - - def stop_near_tracks_criterion(stop_index: int): - return ( - projected[stop_index]["projected_point"] is not None - and distance( - self.stops[stop_index].stop, - projected[stop_index]["projected_point"], - ) - <= MAX_DISTANCE_STOP_TO_LINE - ) - - return projected, stop_near_tracks_criterion - - def project_stops_on_line(self): - projected, stop_near_tracks_criterion = self.get_stop_projections() - - projected_stops_data = { - "first_stop_on_rails_index": None, - "last_stop_on_rails_index": None, - "stops_on_longest_line": [], # list [{'route_stop': RouteStop, - # 'coords': (lon, lat), - # 'positions_on_rails': [] } - } - first_index = 0 - while first_index < len(self.stops) and not stop_near_tracks_criterion( - first_index - ): - first_index += 1 - projected_stops_data["first_stop_on_rails_index"] = first_index - - last_index = len(self.stops) - 1 - while last_index > projected_stops_data[ - "first_stop_on_rails_index" - ] and not stop_near_tracks_criterion(last_index): - last_index -= 1 - projected_stops_data["last_stop_on_rails_index"] = last_index - - for i, route_stop in enumerate(self.stops): - if not first_index <= i <= last_index: - continue - - if projected[i]["projected_point"] is None: - self.city.error( - 'Stop "{}" {} is nowhere near the tracks'.format( - route_stop.stoparea.name, route_stop.stop - ), - self.element, - ) - else: - stop_data = { - "route_stop": route_stop, - "coords": None, - "positions_on_rails": None, - } - projected_point = projected[i]["projected_point"] - # We've got two separate stations with a good stretch of - # railway tracks between them. Put these on tracks. - d = round(distance(route_stop.stop, projected_point)) - if d > MAX_DISTANCE_STOP_TO_LINE: - self.city.notice( - 'Stop "{}" {} is {} meters from the tracks'.format( - route_stop.stoparea.name, route_stop.stop, d - ), - self.element, - ) - else: - stop_data["coords"] = projected_point - stop_data["positions_on_rails"] = projected[i][ - "positions_on_line" - ] - projected_stops_data["stops_on_longest_line"].append(stop_data) - return projected_stops_data - - def calculate_distances(self): - dist = 0 - vertex = 0 - for i, stop in enumerate(self.stops): - if i > 0: - direct = distance(stop.stop, self.stops[i - 1].stop) - d_line = None - if ( - self.first_stop_on_rails_index - <= i - <= self.last_stop_on_rails_index - ): - d_line = distance_on_line( - self.stops[i - 1].stop, stop.stop, self.tracks, vertex - ) - if d_line and direct - 10 <= d_line[0] <= direct * 2: - vertex = d_line[1] - dist += round(d_line[0]) - else: - dist += round(direct) - stop.distance = dist - - def process_tags(self, master): - relation = self.element - master_tags = {} if not master else master["tags"] - if "ref" not in relation["tags"] and "ref" not in master_tags: - self.city.notice("Missing ref on a route", relation) - self.ref = relation["tags"].get( - "ref", master_tags.get("ref", relation["tags"].get("name", None)) - ) - self.name = relation["tags"].get("name", None) - self.mode = relation["tags"]["route"] - if ( - "colour" not in relation["tags"] - and "colour" not in master_tags - and self.mode != "tram" - ): - self.city.notice("Missing colour on a route", relation) - try: - self.colour = normalize_colour( - relation["tags"].get("colour", master_tags.get("colour", None)) - ) - except ValueError as e: - self.colour = None - self.city.warn(str(e), relation) - try: - self.infill = normalize_colour( - relation["tags"].get( - "colour:infill", master_tags.get("colour:infill", None) - ) - ) - except ValueError as e: - self.infill = None - self.city.warn(str(e), relation) - self.network = Route.get_network(relation) - self.interval = Route.get_interval( - relation["tags"] - ) or Route.get_interval(master_tags) - self.start_time, self.end_time = get_start_end_times( - relation["tags"].get( - "opening_hours", master_tags.get("opening_hours", "") - ) - ) - if relation["tags"].get("public_transport:version") == "1": - self.city.warn( - "Public transport version is 1, which means the route " - "is an unsorted pile of objects", - relation, - ) - - def process_stop_members(self): - stations = set() # temporary for recording stations - seen_stops = False - seen_platforms = False - repeat_pos = None - stop_position_elements = [] - for m in self.element["members"]: - if "inactive" in m["role"]: - continue - k = el_id(m) - if k in self.city.stations: - st_list = self.city.stations[k] - st = st_list[0] - if len(st_list) > 1: - self.city.error( - f"Ambiguous station {st.name} in route. Please " - "use stop_position or split interchange stations", - self.element, - ) - el = self.city.elements[k] - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - if m["role"] and actual_role not in m["role"]: - self.city.warn( - "Wrong role '{}' for {} {}".format( - m["role"], actual_role, k - ), - self.element, - ) - if repeat_pos is None: - if not self.stops or st not in stations: - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - elif self.stops[-1].stoparea.id == st.id: - stop = self.stops[-1] - else: - # We've got a repeat - if ( - (seen_stops and seen_platforms) - or ( - actual_role == "stop" - and not seen_platforms - ) - or ( - actual_role == "platform" - and not seen_stops - ) - ): - # Circular route! - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - else: - repeat_pos = 0 - if repeat_pos is not None: - if repeat_pos >= len(self.stops): - continue - # Check that the type matches - if (actual_role == "stop" and seen_stops) or ( - actual_role == "platform" and seen_platforms - ): - self.city.error( - 'Found an out-of-place {}: "{}" ({})'.format( - actual_role, el["tags"].get("name", ""), k - ), - self.element, - ) - continue - # Find the matching stop starting with index repeat_pos - while ( - repeat_pos < len(self.stops) - and self.stops[repeat_pos].stoparea.id != st.id - ): - repeat_pos += 1 - if repeat_pos >= len(self.stops): - self.city.error( - "Incorrect order of {}s at {}".format( - actual_role, k - ), - self.element, - ) - continue - stop = self.stops[repeat_pos] - - stop.add(m, self.element, self.city) - if repeat_pos is None: - seen_stops |= stop.seen_stop or stop.seen_station - seen_platforms |= stop.seen_platform - - if StopArea.is_stop(el): - stop_position_elements.append(el) - - continue - - if k not in self.city.elements: - if "stop" in m["role"] or "platform" in m["role"]: - raise CriticalValidationError( - f"{m['role']} {m['type']} {m['ref']} for route " - f"relation {self.element['id']} is not in the dataset" - ) - continue - el = self.city.elements[k] - if "tags" not in el: - self.city.error( - f"Untagged object {k} in a route", self.element - ) - continue - - is_under_construction = False - for ck in CONSTRUCTION_KEYS: - if ck in el["tags"]: - self.city.warn( - f"Under construction {m['role'] or 'feature'} {k} " - "in route. Consider setting 'inactive' role or " - "removing construction attributes", - self.element, - ) - is_under_construction = True - break - if is_under_construction: - continue - - if Station.is_station(el, self.city.modes): - # A station may be not included into this route due to previous - # 'stop area has multiple stations' error. No other error - # message is needed. - pass - elif el["tags"].get("railway") in ("station", "halt"): - self.city.error( - "Missing station={} on a {}".format(self.mode, m["role"]), - el, - ) - else: - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - self.city.error( - f"{actual_role} {m['type']} {m['ref']} is not " - "connected to a station in route", - self.element, - ) - elif not StopArea.is_track(el): - self.city.warn( - "Unknown member type for {} {} in route".format( - m["type"], m["ref"] - ), - self.element, - ) - return stop_position_elements - - def process_tracks(self, stop_position_elements: list[dict]) -> None: - tracks, line_nodes = self.build_longest_line() - - for stop_el in stop_position_elements: - stop_id = el_id(stop_el) - if stop_id not in line_nodes: - self.city.warn( - 'Stop position "{}" ({}) is not on tracks'.format( - stop_el["tags"].get("name", ""), stop_id - ), - self.element, - ) - - # self.tracks would be a list of (lon, lat) for the longest stretch. - # Can be empty. - self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] - if ( - None in self.tracks - ): # usually, extending BBOX for the city is needed - self.tracks = [] - for n in filter(lambda x: x not in self.city.elements, tracks): - self.city.warn( - f"The dataset is missing the railway tracks node {n}", - self.element, - ) - break - - if len(self.stops) > 1: - self.is_circular = ( - self.stops[0].stoparea == self.stops[-1].stoparea - ) - if ( - self.is_circular - and self.tracks - and self.tracks[0] != self.tracks[-1] - ): - self.city.warn( - "Non-closed rail sequence in a circular route", - self.element, - ) - - projected_stops_data = self.project_stops_on_line() - self.check_and_recover_stops_order(projected_stops_data) - self.apply_projected_stops_data(projected_stops_data) - - def apply_projected_stops_data(self, projected_stops_data: dict) -> None: - """Store better stop coordinates and indexes of first/last stops - that lie on a continuous track line, to the instance attributes. - """ - for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): - setattr(self, attr, projected_stops_data[attr]) - - for stop_data in projected_stops_data["stops_on_longest_line"]: - route_stop = stop_data["route_stop"] - route_stop.positions_on_rails = stop_data["positions_on_rails"] - if stop_coords := stop_data["coords"]: - route_stop.stop = stop_coords - - def get_extended_tracks(self): - """Amend tracks with points of leading/trailing self.stops - that were not projected onto the longest tracks line. - Return a new array. - """ - if self.first_stop_on_rails_index >= len(self.stops): - tracks = [route_stop.stop for route_stop in self.stops] - else: - tracks = ( - [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i < self.first_stop_on_rails_index - ] - + self.tracks - + [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i > self.last_stop_on_rails_index - ] - ) - return tracks - - def get_truncated_tracks(self, tracks): - """Truncate leading/trailing segments of `tracks` param - that are beyond the first and last stop locations. - Return a new array. - """ - if self.is_circular: - return tracks.copy() - - first_stop_location = find_segment(self.stops[0].stop, tracks, 0) - last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) - - if last_stop_location != (None, None): - seg2, u2 = last_stop_location - if u2 == 0.0: - # Make seg2 the segment the last_stop_location is - # at the middle or end of - seg2 -= 1 - # u2 = 1.0 - if seg2 + 2 < len(tracks): - tracks = tracks[0 : seg2 + 2] # noqa E203 - tracks[-1] = self.stops[-1].stop - - if first_stop_location != (None, None): - seg1, u1 = first_stop_location - if u1 == 1.0: - # Make seg1 the segment the first_stop_location is - # at the beginning or middle of - seg1 += 1 - # u1 = 0.0 - if seg1 > 0: - tracks = tracks[seg1:] - tracks[0] = self.stops[0].stop - - return tracks - - def get_tracks_geometry(self): - tracks = self.get_extended_tracks() - tracks = self.get_truncated_tracks(tracks) - return tracks - - def check_stops_order_by_angle(self) -> tuple[list, list]: - disorder_warnings = [] - disorder_errors = [] - for i, route_stop in enumerate( - islice(self.stops, 1, len(self.stops) - 1), start=1 - ): - angle = angle_between( - self.stops[i - 1].stop, - route_stop.stop, - self.stops[i + 1].stop, - ) - if angle < ALLOWED_ANGLE_BETWEEN_STOPS: - msg = ( - "Angle between stops around " - f'"{route_stop.stoparea.name}" {route_stop.stop} ' - f"is too narrow, {angle} degrees" - ) - if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: - disorder_errors.append(msg) - else: - disorder_warnings.append(msg) - return disorder_warnings, disorder_errors - - def check_stops_order_on_tracks_direct(self, stop_sequence) -> str | None: - """Checks stops order on tracks, following stop_sequence - in direct order only. - :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', - 'coords'} for RouteStops that belong to the longest contiguous - sequence of tracks in a route. - :return: error message on the first order violation or None. - """ - allowed_order_violations = 1 if self.is_circular else 0 - max_position_on_rails = -1 - for stop_data in stop_sequence: - positions_on_rails = stop_data["positions_on_rails"] - suitable_occurrence = 0 - while ( - suitable_occurrence < len(positions_on_rails) - and positions_on_rails[suitable_occurrence] - < max_position_on_rails - ): - suitable_occurrence += 1 - if suitable_occurrence == len(positions_on_rails): - if allowed_order_violations > 0: - suitable_occurrence -= 1 - allowed_order_violations -= 1 - else: - route_stop = stop_data["route_stop"] - return ( - "Stops on tracks are unordered near " - f'"{route_stop.stoparea.name}" {route_stop.stop}' - ) - max_position_on_rails = positions_on_rails[suitable_occurrence] - - def check_stops_order_on_tracks(self, projected_stops_data) -> str | None: - """Checks stops order on tracks, trying direct and reversed - order of stops in the stop_sequence. - :param projected_stops_data: info about RouteStops that belong to the - longest contiguous sequence of tracks in a route. May be changed - if tracks reversing is performed. - :return: error message on the first order violation or None. - """ - error_message = self.check_stops_order_on_tracks_direct( - projected_stops_data["stops_on_longest_line"] - ) - if error_message: - error_message_reversed = self.check_stops_order_on_tracks_direct( - reversed(projected_stops_data["stops_on_longest_line"]) - ) - if error_message_reversed is None: - error_message = None - self.city.warn( - "Tracks seem to go in the opposite direction to stops", - self.element, - ) - self.tracks.reverse() - new_projected_stops_data = self.project_stops_on_line() - projected_stops_data.update(new_projected_stops_data) - - return error_message - - def check_stops_order(self, projected_stops_data): - ( - angle_disorder_warnings, - angle_disorder_errors, - ) = self.check_stops_order_by_angle() - disorder_on_tracks_error = self.check_stops_order_on_tracks( - projected_stops_data - ) - disorder_warnings = angle_disorder_warnings - disorder_errors = angle_disorder_errors - if disorder_on_tracks_error: - disorder_errors.append(disorder_on_tracks_error) - return disorder_warnings, disorder_errors - - def check_and_recover_stops_order(self, projected_stops_data: dict): - """ - :param projected_stops_data: may change if we need to reverse tracks - """ - disorder_warnings, disorder_errors = self.check_stops_order( - projected_stops_data - ) - if disorder_warnings or disorder_errors: - resort_success = False - if self.city.recovery_data: - resort_success = self.try_resort_stops() - if resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.warn( - "Fixed with recovery data: " + msg, self.element - ) - - if not resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.error(msg, self.element) - - def try_resort_stops(self): - """Precondition: self.city.recovery_data is not None. - Return success of station order recovering.""" - self_stops = {} # station name => RouteStop - for stop in self.stops: - station = stop.stoparea.station - stop_name = station.name - if stop_name == "?" and station.int_name: - stop_name = station.int_name - # We won't programmatically recover routes with repeating stations: - # such cases are rare and deserves manual verification - if stop_name in self_stops: - return False - self_stops[stop_name] = stop - - route_id = (self.colour, self.ref) - if route_id not in self.city.recovery_data: - return False - - stop_names = list(self_stops.keys()) - suitable_itineraries = [] - for itinerary in self.city.recovery_data[route_id]: - itinerary_stop_names = [ - stop["name"] for stop in itinerary["stations"] - ] - if not ( - len(stop_names) == len(itinerary_stop_names) - and sorted(stop_names) == sorted(itinerary_stop_names) - ): - continue - big_station_displacement = False - for it_stop in itinerary["stations"]: - name = it_stop["name"] - it_stop_center = it_stop["center"] - self_stop_center = self_stops[name].stoparea.station.center - if ( - distance(it_stop_center, self_stop_center) - > DISPLACEMENT_TOLERANCE - ): - big_station_displacement = True - break - if not big_station_displacement: - suitable_itineraries.append(itinerary) - - if len(suitable_itineraries) == 0: - return False - elif len(suitable_itineraries) == 1: - matching_itinerary = suitable_itineraries[0] - else: - from_tag = self.element["tags"].get("from") - to_tag = self.element["tags"].get("to") - if not from_tag and not to_tag: - return False - matching_itineraries = [ - itin - for itin in suitable_itineraries - if from_tag - and itin["from"] == from_tag - or to_tag - and itin["to"] == to_tag - ] - if len(matching_itineraries) != 1: - return False - matching_itinerary = matching_itineraries[0] - self.stops = [ - self_stops[stop["name"]] for stop in matching_itinerary["stations"] - ] - return True - - def __len__(self): - return len(self.stops) - - def __getitem__(self, i): - return self.stops[i] - - def __iter__(self): - return iter(self.stops) - - def __repr__(self): - return ( - "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " - "circular={}, num_stops={}, line_length={} m, from={}, to={}" - ).format( - self.id, - self.mode, - self.ref, - self.name, - self.network, - self.interval, - self.is_circular, - len(self.stops), - self.stops[-1].distance, - self.stops[0], - self.stops[-1], - ) - - -class RouteMaster: - def __init__(self, master=None): - self.routes = [] - self.best = None - self.id = el_id(master) - self.has_master = master is not None - self.interval_from_master = False - if master: - self.ref = master["tags"].get( - "ref", master["tags"].get("name", None) - ) - try: - self.colour = normalize_colour( - master["tags"].get("colour", None) - ) - except ValueError: - self.colour = None - try: - self.infill = normalize_colour( - master["tags"].get("colour:infill", None) - ) - except ValueError: - self.infill = None - self.network = Route.get_network(master) - self.mode = master["tags"].get( - "route_master", None - ) # This tag is required, but okay - self.name = master["tags"].get("name", None) - self.interval = Route.get_interval(master["tags"]) - self.interval_from_master = self.interval is not None - else: - self.ref = None - self.colour = None - self.infill = None - self.network = None - self.mode = None - self.name = None - self.interval = None - - def add(self, route, city): - if not self.network: - self.network = route.network - elif route.network and route.network != self.network: - city.error( - 'Route has different network ("{}") from master "{}"'.format( - route.network, self.network - ), - route.element, - ) - - if not self.colour: - self.colour = route.colour - elif route.colour and route.colour != self.colour: - city.notice( - 'Route "{}" has different colour from master "{}"'.format( - route.colour, self.colour - ), - route.element, - ) - - if not self.infill: - self.infill = route.infill - elif route.infill and route.infill != self.infill: - city.notice( - ( - f'Route "{route.infill}" has different infill colour ' - f'from master "{self.infill}"' - ), - route.element, - ) - - if not self.ref: - self.ref = route.ref - elif route.ref != self.ref: - city.notice( - 'Route "{}" has different ref from master "{}"'.format( - route.ref, self.ref - ), - route.element, - ) - - if not self.name: - self.name = route.name - - if not self.mode: - self.mode = route.mode - elif route.mode != self.mode: - city.error( - "Incompatible PT mode: master has {} and route has {}".format( - self.mode, route.mode - ), - route.element, - ) - return - - if not self.interval_from_master and route.interval: - if not self.interval: - self.interval = route.interval - else: - self.interval = min(self.interval, route.interval) - - if not self.has_master and (not self.id or self.id > route.id): - self.id = route.id - - self.routes.append(route) - if not self.best or len(route.stops) > len(self.best.stops): - self.best = route - - def stop_areas(self): - """Returns a list of all stations on all route variants.""" - seen_ids = set() - for route in self.routes: - for stop in route: - st = stop.stoparea - if st.id not in seen_ids: - seen_ids.add(st.id) - yield st - - def __len__(self): - return len(self.routes) - - def __getitem__(self, i): - return self.routes[i] - - def __iter__(self): - return iter(self.routes) - - def __repr__(self): - return ( - f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " - f"name={self.name}, network={self.network}, " - f"num_variants={len(self.routes)}" - ) - - -class City: - route_class = Route - - def __init__(self, city_data, overground=False): - self.validate_called = False - self.errors = [] - self.warnings = [] - self.notices = [] - self.try_fill_int_attribute(city_data, "id") - self.name = city_data["name"] - self.country = city_data["country"] - self.continent = city_data["continent"] - self.overground = overground - if not overground: - self.try_fill_int_attribute(city_data, "num_stations") - self.try_fill_int_attribute(city_data, "num_lines", "0") - self.try_fill_int_attribute(city_data, "num_light_lines", "0") - self.try_fill_int_attribute(city_data, "num_interchanges", "0") - else: - self.try_fill_int_attribute(city_data, "num_tram_lines", "0") - self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") - self.try_fill_int_attribute(city_data, "num_bus_lines", "0") - self.try_fill_int_attribute(city_data, "num_other_lines", "0") - - # Acquiring list of networks and modes - networks = ( - None - if not city_data["networks"] - else city_data["networks"].split(":") - ) - if not networks or len(networks[-1]) == 0: - self.networks = [] - else: - self.networks = set( - filter(None, [x.strip() for x in networks[-1].split(";")]) - ) - if not networks or len(networks) < 2 or len(networks[0]) == 0: - if self.overground: - self.modes = DEFAULT_MODES_OVERGROUND - else: - self.modes = DEFAULT_MODES_RAPID - else: - self.modes = set([x.strip() for x in networks[0].split(",")]) - - # Reversing bbox so it is (xmin, ymin, xmax, ymax) - bbox = city_data["bbox"].split(",") - if len(bbox) == 4: - self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] - else: - self.bbox = None - - self.elements = {} # Dict el_id → el - self.stations = defaultdict(list) # Dict el_id → list of StopAreas - self.routes = {} # Dict route_master_ref → RouteMaster - self.masters = {} # Dict el_id of route → route_master - self.stop_areas = defaultdict( - list - ) # El_id → list of stop_area elements it belongs to - self.transfers = [] # List of lists of stop areas - self.station_ids = set() # Set of stations' uid - self.stops_and_platforms = set() # Set of stops and platforms el_id - self.recovery_data = None - - def try_fill_int_attribute( - self, city_data: dict, attr: str, default: str | None = None - ) -> None: - """Try to convert string value to int. Conversion is considered - to fail if one of the following is true: - * attr is not empty and data type casting fails; - * attr is empty and no default value is given. - In such cases the city is marked as bad by adding an error - to the city validation log. - """ - attr_value = city_data[attr] - if not attr_value and default is not None: - attr_value = default - - try: - attr_int = int(attr_value) - except ValueError: - print_value = ( - f"{city_data[attr]}" if city_data[attr] else "" - ) - self.error( - f"Configuration error: wrong value for {attr}: {print_value}" - ) - setattr(self, attr, 0) - else: - setattr(self, attr, attr_int) - - @staticmethod - def log_message(message, el): - if el: - tags = el.get("tags", {}) - message += ' ({} {}, "{}")'.format( - el["type"], - el.get("id", el.get("ref")), - tags.get("name", tags.get("ref", "")), - ) - return message - - def notice(self, message, el=None): - """This type of message may point to a potential problem.""" - msg = City.log_message(message, el) - self.notices.append(msg) - - def warn(self, message, el=None): - """A warning is definitely a problem but is doesn't prevent - from building a routing file and doesn't invalidate the city. - """ - msg = City.log_message(message, el) - self.warnings.append(msg) - - def error(self, message, el=None): - """Error if a critical problem that invalidates the city""" - msg = City.log_message(message, el) - self.errors.append(msg) - - def contains(self, el): - center = el_center(el) - if center: - return ( - self.bbox[0] <= center[1] <= self.bbox[2] - and self.bbox[1] <= center[0] <= self.bbox[3] - ) - return False - - def add(self, el): - if el["type"] == "relation" and "members" not in el: - return - - self.elements[el_id(el)] = el - if not (el["type"] == "relation" and "tags" in el): - return - - relation_type = el["tags"].get("type") - if relation_type == "route_master": - for m in el["members"]: - if m["type"] != "relation": - continue - - if el_id(m) in self.masters: - self.error("Route in two route_masters", m) - self.masters[el_id(m)] = el - - elif el["tags"].get("public_transport") == "stop_area": - if relation_type != "public_transport": - self.warn( - "stop_area relation with " - f"type={relation_type}, needed type=public_transport", - el, - ) - return - - warned_about_duplicates = False - for m in el["members"]: - stop_areas = self.stop_areas[el_id(m)] - if el in stop_areas and not warned_about_duplicates: - self.warn("Duplicate element in a stop area", el) - warned_about_duplicates = True - else: - stop_areas.append(el) - - def make_transfer(self, sag): - transfer = set() - for m in sag["members"]: - k = el_id(m) - el = self.elements.get(k) - if not el: - # A sag member may validly not belong to the city while - # the sag does - near the city bbox boundary - continue - if "tags" not in el: - self.warn( - "An untagged object {} in a stop_area_group".format(k), sag - ) - continue - if ( - el["type"] != "relation" - or el["tags"].get("type") != "public_transport" - or el["tags"].get("public_transport") != "stop_area" - ): - continue - if k in self.stations: - stoparea = self.stations[k][0] - transfer.add(stoparea) - if stoparea.transfer: - # TODO: properly process such cases. - # Counterexample 1: Paris, - # Châtelet subway station <-> - # "Châtelet - Les Halles" railway station <-> - # Les Halles subway station - # Counterexample 2: Saint-Petersburg, transfers - # Витебский вокзал <-> - # Пушкинская <-> - # Звенигородская - self.warn( - "Stop area {} belongs to multiple interchanges".format( - k - ) - ) - stoparea.transfer = el_id(sag) - if len(transfer) > 1: - self.transfers.append(transfer) - - def extract_routes(self): - # Extract stations - processed_stop_areas = set() - for el in self.elements.values(): - if Station.is_station(el, self.modes): - # See PR https://github.com/mapsme/subways/pull/98 - if ( - el["type"] == "relation" - and el["tags"].get("type") != "multipolygon" - ): - rel_type = el["tags"].get("type") - self.warn( - "A railway station cannot be a relation of type " - f"{rel_type}", - el, - ) - continue - st = Station(el, self) - self.station_ids.add(st.id) - if st.id in self.stop_areas: - stations = [] - for sa in self.stop_areas[st.id]: - stations.append(StopArea(st, self, sa)) - else: - stations = [StopArea(st, self)] - - for station in stations: - if station.id not in processed_stop_areas: - processed_stop_areas.add(station.id) - for st_el in station.get_elements(): - self.stations[st_el].append(station) - - # Check that stops and platforms belong to - # a single stop_area - for sp in station.stops | station.platforms: - if sp in self.stops_and_platforms: - self.notice( - f"A stop or a platform {sp} belongs to " - "multiple stop areas, might be correct" - ) - else: - self.stops_and_platforms.add(sp) - - # Extract routes - for el in self.elements.values(): - if Route.is_route(el, self.modes): - if el["tags"].get("access") in ("no", "private"): - continue - route_id = el_id(el) - master = self.masters.get(route_id, None) - if self.networks: - network = Route.get_network(el) - if master: - master_network = Route.get_network(master) - else: - master_network = None - if ( - network not in self.networks - and master_network not in self.networks - ): - continue - - route = self.route_class(el, self, master) - if not route.stops: - self.warn("Route has no stops", el) - continue - elif len(route.stops) == 1: - self.warn("Route has only one stop", el) - continue - - k = el_id(master) if master else route.ref - if k not in self.routes: - self.routes[k] = RouteMaster(master) - self.routes[k].add(route, self) - - # Sometimes adding a route to a newly initialized RouteMaster - # can fail - if len(self.routes[k]) == 0: - del self.routes[k] - - # And while we're iterating over relations, find interchanges - if ( - el["type"] == "relation" - and el.get("tags", {}).get("public_transport", None) - == "stop_area_group" - ): - self.make_transfer(el) - - # Filter transfers, leaving only stations that belong to routes - used_stop_areas = set() - for rmaster in self.routes.values(): - for route in rmaster: - used_stop_areas.update([s.stoparea for s in route.stops]) - new_transfers = [] - for transfer in self.transfers: - new_tr = [s for s in transfer if s in used_stop_areas] - if len(new_tr) > 1: - new_transfers.append(new_tr) - self.transfers = new_transfers - - def __iter__(self): - return iter(self.routes.values()) - - @property - def is_good(self): - if not (self.errors or self.validate_called): - raise RuntimeError( - "You mustn't refer to City.is_good property before calling " - "the City.validate() method unless an error already occurred." - ) - return len(self.errors) == 0 - - def get_validation_result(self): - result = { - "name": self.name, - "country": self.country, - "continent": self.continent, - "stations_found": getattr(self, "found_stations", 0), - "transfers_found": getattr(self, "found_interchanges", 0), - "unused_entrances": getattr(self, "unused_entrances", 0), - "networks": getattr(self, "found_networks", 0), - } - if not self.overground: - result.update( - { - "subwayl_expected": getattr(self, "num_lines", 0), - "lightrl_expected": getattr(self, "num_light_lines", 0), - "subwayl_found": getattr(self, "found_lines", 0), - "lightrl_found": getattr(self, "found_light_lines", 0), - "stations_expected": getattr(self, "num_stations", 0), - "transfers_expected": getattr(self, "num_interchanges", 0), - } - ) - else: - result.update( - { - "stations_expected": 0, - "transfers_expected": 0, - "busl_expected": getattr(self, "num_bus_lines", 0), - "trolleybusl_expected": getattr( - self, "num_trolleybus_lines", 0 - ), - "traml_expected": getattr(self, "num_tram_lines", 0), - "otherl_expected": getattr(self, "num_other_lines", 0), - "busl_found": getattr(self, "found_bus_lines", 0), - "trolleybusl_found": getattr( - self, "found_trolleybus_lines", 0 - ), - "traml_found": getattr(self, "found_tram_lines", 0), - "otherl_found": getattr(self, "found_other_lines", 0), - } - ) - result["warnings"] = self.warnings - result["errors"] = self.errors - result["notices"] = self.notices - return result - - def count_unused_entrances(self): - global used_entrances - stop_areas = set() - for el in self.elements.values(): - if ( - el["type"] == "relation" - and "tags" in el - and el["tags"].get("public_transport") == "stop_area" - and "members" in el - ): - stop_areas.update([el_id(m) for m in el["members"]]) - unused = [] - not_in_sa = [] - for el in self.elements.values(): - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - i = el_id(el) - if i in self.stations: - used_entrances.add(i) - if i not in stop_areas: - not_in_sa.append(i) - if i not in self.stations: - unused.append(i) - self.unused_entrances = len(unused) - self.entrances_not_in_stop_areas = len(not_in_sa) - if unused: - self.notice( - f"{len(unused)} subway entrances are not connected to a " - f"station: {format_elid_list(unused)}" - ) - if not_in_sa: - self.notice( - f"{len(not_in_sa)} subway entrances are not in stop_area " - f"relations: {format_elid_list(not_in_sa)}" - ) - - def check_return_routes(self, rmaster): - variants = {} - have_return = set() - for variant in rmaster: - if len(variant) < 2: - continue - # Using transfer ids because a train can arrive at different - # stations within a transfer. But disregard transfer that may give - # an impression of a circular route (for example, - # Simonis / Elisabeth station and route 2 in Brussels) - if variant[0].stoparea.transfer == variant[-1].stoparea.transfer: - t = (variant[0].stoparea.id, variant[-1].stoparea.id) - else: - t = ( - variant[0].stoparea.transfer or variant[0].stoparea.id, - variant[-1].stoparea.transfer or variant[-1].stoparea.id, - ) - if t in variants: - continue - variants[t] = variant.element - tr = (t[1], t[0]) - if tr in variants: - have_return.add(t) - have_return.add(tr) - - if len(variants) == 0: - self.error( - "An empty route master {}. Please set construction:route " - "if it is under construction".format(rmaster.id) - ) - elif len(variants) == 1: - log_function = ( - self.error if not rmaster.best.is_circular else self.notice - ) - log_function( - "Only one route in route_master. " - "Please check if it needs a return route", - rmaster.best.element, - ) - else: - for t, rel in variants.items(): - if t not in have_return: - self.notice("Route does not have a return direction", rel) - - def validate_lines(self): - self.found_light_lines = len( - [x for x in self.routes.values() if x.mode != "subway"] - ) - self.found_lines = len(self.routes) - self.found_light_lines - if self.found_lines != self.num_lines: - self.error( - "Found {} subway lines, expected {}".format( - self.found_lines, self.num_lines - ) - ) - if self.found_light_lines != self.num_light_lines: - self.error( - "Found {} light rail lines, expected {}".format( - self.found_light_lines, self.num_light_lines - ) - ) - - def validate_overground_lines(self): - self.found_tram_lines = len( - [x for x in self.routes.values() if x.mode == "tram"] - ) - self.found_bus_lines = len( - [x for x in self.routes.values() if x.mode == "bus"] - ) - self.found_trolleybus_lines = len( - [x for x in self.routes.values() if x.mode == "trolleybus"] - ) - self.found_other_lines = len( - [ - x - for x in self.routes.values() - if x.mode not in ("bus", "trolleybus", "tram") - ] - ) - if self.found_tram_lines != self.num_tram_lines: - log_function = ( - self.error if self.found_tram_lines == 0 else self.notice - ) - log_function( - "Found {} tram lines, expected {}".format( - self.found_tram_lines, self.num_tram_lines - ), - ) - - def validate(self): - networks = Counter() - self.found_stations = 0 - unused_stations = set(self.station_ids) - for rmaster in self.routes.values(): - networks[str(rmaster.network)] += 1 - if not self.overground: - self.check_return_routes(rmaster) - route_stations = set() - for sa in rmaster.stop_areas(): - route_stations.add(sa.transfer or sa.id) - unused_stations.discard(sa.station.id) - self.found_stations += len(route_stations) - if unused_stations: - self.unused_stations = len(unused_stations) - self.notice( - "{} unused stations: {}".format( - self.unused_stations, format_elid_list(unused_stations) - ) - ) - self.count_unused_entrances() - self.found_interchanges = len(self.transfers) - - if self.overground: - self.validate_overground_lines() - else: - self.validate_lines() - - if self.found_stations != self.num_stations: - msg = "Found {} stations in routes, expected {}".format( - self.found_stations, self.num_stations - ) - log_function = ( - self.error - if self.num_stations > 0 - and not ( - 0 - <= (self.num_stations - self.found_stations) - / self.num_stations - <= ALLOWED_STATIONS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - if self.found_interchanges != self.num_interchanges: - msg = "Found {} interchanges, expected {}".format( - self.found_interchanges, self.num_interchanges - ) - log_function = ( - self.error - if self.num_interchanges != 0 - and not ( - (self.num_interchanges - self.found_interchanges) - / self.num_interchanges - <= ALLOWED_TRANSFERS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - self.found_networks = len(networks) - if len(networks) > max(1, len(self.networks)): - n_str = "; ".join( - ["{} ({})".format(k, v) for k, v in networks.items()] - ) - self.notice("More than one network: {}".format(n_str)) - - self.validate_called = True - - def calculate_distances(self) -> None: - for route_master in self: - for route in route_master: - route.calculate_distances() - - -def find_transfers(elements, cities): - transfers = [] - stop_area_groups = [] - for el in elements: - if ( - el["type"] == "relation" - and "members" in el - and el.get("tags", {}).get("public_transport") == "stop_area_group" - ): - stop_area_groups.append(el) - - # StopArea.id uniquely identifies a StopArea. We must ensure StopArea - # uniqueness since one stop_area relation may result in - # several StopArea instances at inter-city interchanges. - stop_area_ids = defaultdict(set) # el_id -> set of StopArea.id - stop_area_objects = dict() # StopArea.id -> one of StopArea instances - for city in cities: - for el, st in city.stations.items(): - stop_area_ids[el].update(sa.id for sa in st) - stop_area_objects.update((sa.id, sa) for sa in st) - - for sag in stop_area_groups: - transfer = set() - for m in sag["members"]: - k = el_id(m) - if k not in stop_area_ids: - continue - transfer.update( - stop_area_objects[sa_id] for sa_id in stop_area_ids[k] - ) - if len(transfer) > 1: - transfers.append(transfer) - return transfers - - -def get_unused_entrances_geojson(elements): - global used_entrances - features = [] - for el in elements: - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - if el_id(el) not in used_entrances: - geometry = {"type": "Point", "coordinates": el_center(el)} - properties = { - k: v - for k, v in el["tags"].items() - if k not in ("railway", "entrance") - } - features.append( - { - "type": "Feature", - "geometry": geometry, - "properties": properties, - } - ) - return {"type": "FeatureCollection", "features": features} diff --git a/subways/__init__.py b/subways/__init__.py new file mode 100644 index 00000000..c734b54a --- /dev/null +++ b/subways/__init__.py @@ -0,0 +1,92 @@ +from .consts import ( + ALL_MODES, + CONSTRUCTION_KEYS, + DEFAULT_MODES_RAPID, + DEFAULT_MODES_OVERGROUND, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, + MODES_OVERGROUND, + MODES_RAPID, + RAILWAY_TYPES, +) +from .css_colours import normalize_colour +from .geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + is_near, + project_on_line, +) +from .osm_element import el_center, el_id +from .overpass import multi_overpass, overpass_request +from .subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from .types import ( + CriticalValidationError, + IdT, + LonLat, + OsmElementT, + RailT, + TransferT, + TransfersT, +) +from .validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + DEFAULT_SPREADSHEET_ID, + get_cities_info, + prepare_cities, + validate_cities, +) + + +__all__ = [ + "ALL_MODES", + "CONSTRUCTION_KEYS", + "DEFAULT_MODES_RAPID", + "DEFAULT_MODES_OVERGROUND", + "DISPLACEMENT_TOLERANCE", + "MAX_DISTANCE_STOP_TO_LINE", + "MODES_OVERGROUND", + "MODES_RAPID", + "RAILWAY_TYPES", + "angle_between", + "distance", + "distance_on_line", + "find_segment", + "is_near", + "project_on_line", + "normalize_colour", + "el_center", + "el_id", + "overpass_request", + "multi_overpass", + "dump_yaml", + "load_xml", + "make_geojson", + "read_recovery_data", + "write_recovery_data", + "CriticalValidationError", + "IdT", + "LonLat", + "OsmElementT", + "RailT", + "TransferT", + "TransfersT", + "add_osm_elements_to_cities", + "BAD_MARK", + "calculate_centers", + "DEFAULT_CITIES_INFO_URL", + "DEFAULT_SPREADSHEET_ID", + "get_cities_info", + "prepare_cities", + "validate_cities", +] diff --git a/subways/consts.py b/subways/consts.py new file mode 100644 index 00000000..4d75426b --- /dev/null +++ b/subways/consts.py @@ -0,0 +1,26 @@ +MAX_DISTANCE_STOP_TO_LINE = 50 # in meters + +# If an object was moved not too far compared to previous validator run, +# it is likely the same object +DISPLACEMENT_TOLERANCE = 300 # in meters + +MODES_RAPID = {"subway", "light_rail", "monorail", "train"} +MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} +DEFAULT_MODES_RAPID = {"subway", "light_rail"} +DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? +ALL_MODES = MODES_RAPID | MODES_OVERGROUND +RAILWAY_TYPES = { + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", +} +CONSTRUCTION_KEYS = ( + "construction", + "proposed", + "construction:railway", + "proposed:railway", +) diff --git a/css_colours.py b/subways/css_colours.py similarity index 98% rename from css_colours.py rename to subways/css_colours.py index 72180547..170d3900 100644 --- a/css_colours.py +++ b/subways/css_colours.py @@ -152,7 +152,7 @@ } -def normalize_colour(c): +def normalize_colour(c: str | None) -> str | None: if not c: return None c = c.strip().lower() diff --git a/subways/geom_utils.py b/subways/geom_utils.py new file mode 100644 index 00000000..30d1a2d7 --- /dev/null +++ b/subways/geom_utils.py @@ -0,0 +1,175 @@ +import math + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.types import LonLat, RailT + + +def distance(p1: LonLat, p2: LonLat) -> float: + if p1 is None or p2 is None: + raise Exception( + "One of arguments to distance({}, {}) is None".format(p1, p2) + ) + dx = math.radians(p1[0] - p2[0]) * math.cos( + 0.5 * math.radians(p1[1] + p2[1]) + ) + dy = math.radians(p1[1] - p2[1]) + return 6378137 * math.sqrt(dx * dx + dy * dy) + + +def is_near(p1: LonLat, p2: LonLat) -> bool: + return ( + p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 + and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 + ) + + +def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: + """Given three points, return u - the position of projection of + point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector + """ + dp = (p2[0] - p1[0], p2[1] - p1[1]) + d2 = dp[0] * dp[0] + dp[1] * dp[1] + if d2 < 1e-14: + return None + u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 + if not 0 <= u <= 1: + return None + return u + + +def project_on_line(p: LonLat, line: RailT) -> dict: + result = { + # In the first approximation, position on rails is the index of the + # closest vertex of line to the point p. Fractional value means that + # the projected point lies on a segment between two vertices. + # More than one value can occur if a route follows the same tracks + # more than once. + "positions_on_line": None, + "projected_point": None, # (lon, lat) + } + + if len(line) < 2: + return result + d_min = MAX_DISTANCE_STOP_TO_LINE * 5 + closest_to_vertex = False + # First, check vertices in the line + for i, vertex in enumerate(line): + d = distance(p, vertex) + if d < d_min: + result["positions_on_line"] = [i] + result["projected_point"] = vertex + d_min = d + closest_to_vertex = True + elif vertex == result["projected_point"]: + # Repeated occurrence of the track vertex in line, like Oslo Line 5 + result["positions_on_line"].append(i) + # And then calculate distances to each segment + for seg in range(len(line) - 1): + # Check bbox for speed + if not ( + ( + min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE + <= p[0] + <= max(line[seg][0], line[seg + 1][0]) + + MAX_DISTANCE_STOP_TO_LINE + ) + and ( + min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE + <= p[1] + <= max(line[seg][1], line[seg + 1][1]) + + MAX_DISTANCE_STOP_TO_LINE + ) + ): + continue + u = project_on_segment(p, line[seg], line[seg + 1]) + if u: + projected_point = ( + line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), + line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), + ) + d = distance(p, projected_point) + if d < d_min: + result["positions_on_line"] = [seg + u] + result["projected_point"] = projected_point + d_min = d + closest_to_vertex = False + elif projected_point == result["projected_point"]: + # Repeated occurrence of the track segment in line, + # like Oslo Line 5 + if not closest_to_vertex: + result["positions_on_line"].append(seg + u) + return result + + +def find_segment( + p: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[int, float] | tuple[None, None]: + """Returns index of a segment and a position inside it.""" + EPS = 1e-9 + for seg in range(start_vertex, len(line) - 1): + if is_near(p, line[seg]): + return seg, 0.0 + if line[seg][0] == line[seg + 1][0]: + if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): + continue + px = None + else: + px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) + if px is None or (0 <= px <= 1): + if line[seg][1] == line[seg + 1][1]: + if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): + continue + py = None + else: + py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) + if py is None or (0 <= py <= 1): + if py is None or px is None or (px - EPS <= py <= px + EPS): + return seg, px or py + return None, None + + +def distance_on_line( + p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[float, int] | None: + """Calculates distance via line between projections + of points p1 and p2. Returns a TUPLE of (d, vertex): + d is the distance and vertex is the number of the second + vertex, to continue calculations for the next point.""" + line_len = len(line) + seg1, pos1 = find_segment(p1, line, start_vertex) + if seg1 is None: + # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) + return None + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + if line[0] == line[-1]: + line = line + line[1:] + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) + return None + if seg1 == seg2: + return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 + if seg2 < seg1: + # Should not happen + raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) + d = 0 + if pos1 < 1: + d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) + for i in range(seg1 + 1, seg2): + d += distance(line[i], line[i + 1]) + if pos2 > 0: + d += distance(line[seg2], line[seg2 + 1]) * pos2 + return d, seg2 % line_len + + +def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: + a = round( + abs( + math.degrees( + math.atan2(p1[1] - c[1], p1[0] - c[0]) + - math.atan2(p2[1] - c[1], p2[0] - c[0]) + ) + ) + ) + return a if a <= 180 else 360 - a diff --git a/subways/osm_element.py b/subways/osm_element.py new file mode 100644 index 00000000..19861da3 --- /dev/null +++ b/subways/osm_element.py @@ -0,0 +1,26 @@ +from subways.types import IdT, LonLat, OsmElementT + + +def el_id(el: OsmElementT) -> IdT | None: + if not el: + return None + if "type" not in el: + raise Exception("What is this element? {}".format(el)) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) + + +def el_center(el: OsmElementT) -> LonLat | None: + if not el: + return None + if "lat" in el: + return el["lon"], el["lat"] + elif "center" in el: + return el["center"]["lon"], el["center"]["lat"] + return None + + +def get_network(relation: OsmElementT) -> str | None: + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] + return None diff --git a/subways/overpass.py b/subways/overpass.py new file mode 100644 index 00000000..88c128e6 --- /dev/null +++ b/subways/overpass.py @@ -0,0 +1,60 @@ +import json +import logging +import time +import urllib.parse +import urllib.request + +from subways.consts import MODES_OVERGROUND, MODES_RAPID +from subways.types import OsmElementT + + +def compose_overpass_request( + overground: bool, bboxes: list[list[float]] +) -> str: + if not bboxes: + raise RuntimeError("No bboxes given for overpass request") + + query = "[out:json][timeout:1000];(" + modes = MODES_OVERGROUND if overground else MODES_RAPID + for bbox in bboxes: + bbox_part = f"({','.join(str(coord) for coord in bbox)})" + query += "(" + for mode in sorted(modes): + query += f'rel[route="{mode}"]{bbox_part};' + query += ");" + query += "rel(br)[type=route_master];" + if not overground: + query += f"node[railway=subway_entrance]{bbox_part};" + query += f"node[railway=train_station_entrance]{bbox_part};" + query += f"rel[public_transport=stop_area]{bbox_part};" + query += ( + "rel(br)[type=public_transport][public_transport=stop_area_group];" + ) + query += ");(._;>>;);out body center qt;" + logging.debug("Query: %s", query) + return query + + +def overpass_request( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + query = compose_overpass_request(overground, bboxes) + url = f"{overpass_api}?data={urllib.parse.quote(query)}" + response = urllib.request.urlopen(url, timeout=1000) + if (r_code := response.getcode()) != 200: + raise Exception(f"Failed to query Overpass API: HTTP {r_code}") + return json.load(response)["elements"] + + +def multi_overpass( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + SLICE_SIZE = 10 + INTERREQUEST_WAIT = 5 # in seconds + result = [] + for i in range(0, len(bboxes), SLICE_SIZE): + if i > 0: + time.sleep(INTERREQUEST_WAIT) + bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 + result.extend(overpass_request(overground, overpass_api, bboxes_i)) + return result diff --git a/processors/__init__.py b/subways/processors/__init__.py similarity index 56% rename from processors/__init__.py rename to subways/processors/__init__.py index 4f5ed844..89ae0169 100644 --- a/processors/__init__.py +++ b/subways/processors/__init__.py @@ -1,4 +1,8 @@ # Import only those processors (modules) you want to use. # Ignore F401 "module imported but unused" violation since these modules # are addressed via introspection. -from . import mapsme, gtfs # noqa F401 +from . import gtfs, mapsme # noqa F401 +from ._common import transit_to_dict + + +__all__ = ["gtfs", "mapsme", "transit_to_dict"] diff --git a/processors/_common.py b/subways/processors/_common.py similarity index 80% rename from processors/_common.py rename to subways/processors/_common.py index e9337190..55658940 100644 --- a/processors/_common.py +++ b/subways/processors/_common.py @@ -1,21 +1,26 @@ -from typing import List, Set +from __future__ import annotations -from subway_structure import City, el_center, StopArea +import typing + +from subways.osm_element import el_center +from subways.types import TransfersT + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier +DEFAULT_AVE_VEHICLE_SPEED = 40 * KMPH_TO_MPS # m/s SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s TRANSFER_PENALTY = 30 # seconds -def format_colour(colour): +def format_colour(colour: str | None) -> str | None: """Truncate leading # sign.""" return colour[1:] if colour else None -def transit_to_dict( - cities: List[City], transfers: List[Set[StopArea]] -) -> dict: +def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict: """Get data for good cities as a dictionary.""" data = { "stopareas": {}, # stoparea id => stoparea data @@ -48,6 +53,7 @@ def transit_to_dict( "start_time": route.start_time, "end_time": route.end_time, "interval": route.interval, + "duration": route.duration, "stops": [ { "stoparea_id": route_stop.stoparea.id, @@ -91,18 +97,17 @@ def transit_to_dict( # transfers pairwise_transfers = set() - for stoparea_set in transfers: - stoparea_list = list(stoparea_set) - for first_i in range(len(stoparea_list) - 1): - for second_i in range(first_i + 1, len(stoparea_list)): - stoparea1_id = stoparea_list[first_i].id - stoparea2_id = stoparea_list[second_i].id + for stoparea_id_set in transfers: + stoparea_ids = sorted(stoparea_id_set) + for first_i in range(len(stoparea_ids) - 1): + for second_i in range(first_i + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[first_i] + stoparea2_id = stoparea_ids[second_i] if all( st_id in data["stopareas"] for st_id in (stoparea1_id, stoparea2_id) ): - id1, id2 = sorted([stoparea1_id, stoparea2_id]) - pairwise_transfers.add((id1, id2)) + pairwise_transfers.add((stoparea1_id, stoparea2_id)) data["transfers"] = pairwise_transfers return data diff --git a/processors/gtfs.py b/subways/processors/gtfs.py similarity index 91% rename from processors/gtfs.py rename to subways/processors/gtfs.py index 5dc39526..df70cc72 100644 --- a/processors/gtfs.py +++ b/subways/processors/gtfs.py @@ -1,23 +1,27 @@ +from __future__ import annotations + import csv +import typing from functools import partial from io import BytesIO, StringIO from itertools import permutations from tarfile import TarFile, TarInfo -from typing import List, Optional, Set from zipfile import ZipFile from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, + KMPH_TO_MPS, SPEED_ON_TRANSFER, TRANSFER_PENALTY, transit_to_dict, ) -from subway_structure import ( - City, - distance, - StopArea, -) +from subways.types import TransfersT +from subways.geom_utils import distance + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_TRIP_START_TIME = (5, 0) # 05:00 @@ -61,6 +65,7 @@ "trip_route_type", "route_pattern_id", "bikes_allowed", + "average_speed", # extension field (km/h) ], "stops": [ "stop_id", @@ -133,13 +138,13 @@ } -def round_coords(coords_tuple): +def round_coords(coords_tuple: tuple) -> tuple: return tuple( map(lambda coord: round(coord, COORDINATE_PRECISION), coords_tuple) ) -def transit_data_to_gtfs(data): +def transit_data_to_gtfs(data: dict) -> dict: # Keys correspond GTFS file names gtfs_data = {key: [] for key in GTFS_COLUMNS.keys()} @@ -240,11 +245,22 @@ def transit_data_to_gtfs(data): for itinerary in route_master["itineraries"]: shape_id = itinerary["id"][1:] # truncate leading 'r' + average_speed = round( + ( + DEFAULT_AVE_VEHICLE_SPEED + if not itinerary["duration"] + else itinerary["stops"][-1]["distance"] + / itinerary["duration"] + ) + / KMPH_TO_MPS, + 1, + ) # km/h trip = { "trip_id": itinerary["id"], "route_id": route_master["id"], "service_id": "always", "shape_id": shape_id, + "average_speed": average_speed, } gtfs_data["trips"].append(trip) @@ -313,14 +329,14 @@ def transit_data_to_gtfs(data): def process( - cities: List[City], - transfers: List[Set[StopArea]], + cities: list[City], + transfers: TransfersT, filename: str, - cache_path: str, -): + cache_path: str | None, +) -> None: """Generate all output and save to file. - :param cities: List of City instances - :param transfers: List of sets of StopArea.id + :param cities: list of City instances + :param transfers: all collected transfers in the world :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ @@ -344,9 +360,7 @@ def dict_to_row(dict_data: dict, record_type: str) -> list: ] -def make_gtfs( - filename: str, gtfs_data: dict, fmt: Optional[str] = None -) -> None: +def make_gtfs(filename: str, gtfs_data: dict, fmt: str | None = None) -> None: if not fmt: fmt = "tar" if filename.endswith(".tar") else "zip" diff --git a/processors/mapsme.py b/subways/processors/mapsme.py similarity index 80% rename from processors/mapsme.py rename to subways/processors/mapsme.py index b8818ea5..32f5b695 100755 --- a/processors/mapsme.py +++ b/subways/processors/mapsme.py @@ -1,15 +1,20 @@ +from __future__ import annotations + import json import logging import os +import typing from collections import defaultdict - -from subway_structure import ( - DISPLACEMENT_TOLERANCE, - distance, - el_center, - Station, -) +from collections.abc import Callable +from typing import Any, TypeAlias + +from subways.consts import DISPLACEMENT_TOLERANCE +from subways.geom_utils import distance +from subways.osm_element import el_center +from subways.structure.station import Station +from subways.types import IdT, LonLat, OsmElementT, TransfersT from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, KMPH_TO_MPS, @@ -17,14 +22,20 @@ TRANSFER_PENALTY, ) +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea + OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s -SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s + +# (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid +TransferTimesT: TypeAlias = dict[tuple[int, int], int] -def uid(elid, typ=None): +def uid(elid: IdT, typ: str | None = None) -> int: t = elid[0] osm_id = int(elid[1:]) if not typ: @@ -37,24 +48,24 @@ def uid(elid, typ=None): class DummyCache: """This class may be used when you need to omit all cache processing""" - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: pass - def __getattr__(self, name): + def __getattr__(self, name: str) -> Callable[..., None]: """This results in that a call to any method effectively does nothing and does not generate exceptions.""" - def method(*args, **kwargs): + def method(*args, **kwargs) -> None: return None return method -def if_object_is_used(method): +def if_object_is_used(method: Callable) -> Callable: """Decorator to skip method execution under certain condition. Relies on "is_used" object property.""" - def inner(self, *args, **kwargs): + def inner(self, *args, **kwargs) -> Any: if not self.is_used: return return method(self, *args, **kwargs) @@ -63,7 +74,7 @@ def inner(self, *args, **kwargs): class MapsmeCache: - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: if not cache_path: # Cache is not used, # all actions with cache must be silently skipped @@ -88,7 +99,7 @@ def __init__(self, cache_path, cities): self.city_dict = {c.name: c for c in cities} self.good_city_names = {c.name for c in cities if c.is_good} - def _is_cached_city_usable(self, city): + def _is_cached_city_usable(self, city: City) -> bool: """Check if cached stations still exist in osm data and not moved far away. """ @@ -103,8 +114,9 @@ def _is_cached_city_usable(self, city): ): return False station_coords = el_center(city_station) - cached_station_coords = tuple( - cached_stoparea[coord] for coord in ("lon", "lat") + cached_station_coords = ( + cached_stoparea["lon"], + cached_stoparea["lat"], ) displacement = distance(station_coords, cached_station_coords) if displacement > DISPLACEMENT_TOLERANCE: @@ -113,7 +125,9 @@ def _is_cached_city_usable(self, city): return True @if_object_is_used - def provide_stops_and_networks(self, stops, networks): + def provide_stops_and_networks( + self, stops: dict, networks: list[dict] + ) -> None: """Put stops and networks for bad cities into containers passed as arguments.""" for city in self.city_dict.values(): @@ -126,7 +140,7 @@ def provide_stops_and_networks(self, stops, networks): self.recovered_city_names.add(city.name) @if_object_is_used - def provide_transfers(self, transfers): + def provide_transfers(self, transfers: TransferTimesT) -> None: """Add transfers from usable cached cities to 'transfers' dict passed as argument.""" for city_name in self.recovered_city_names: @@ -136,7 +150,7 @@ def provide_transfers(self, transfers): transfers[(stop1_uid, stop2_uid)] = transfer_time @if_object_is_used - def initialize_good_city(self, city_name, network): + def initialize_good_city(self, city_name: str, network: dict) -> None: """Create/replace one cache element with new data container. This should be done for each good city.""" self.cache[city_name] = { @@ -147,20 +161,22 @@ def initialize_good_city(self, city_name, network): } @if_object_is_used - def link_stop_with_city(self, stoparea_id, city_name): + def link_stop_with_city(self, stoparea_id: IdT, city_name: str) -> None: """Remember that some stop_area is used in a city.""" stoparea_uid = uid(stoparea_id) self.stop_cities[stoparea_uid].add(city_name) @if_object_is_used - def add_stop(self, stoparea_id, st): + def add_stop(self, stoparea_id: IdT, st: dict) -> None: """Add stoparea to the cache of each city the stoparea is in.""" stoparea_uid = uid(stoparea_id) for city_name in self.stop_cities[stoparea_uid]: self.cache[city_name]["stops"][stoparea_id] = st @if_object_is_used - def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): + def add_transfer( + self, stoparea1_uid: int, stoparea2_uid: int, transfer_time: int + ) -> None: """If a transfer is inside a good city, add it to the city's cache.""" for city_name in ( self.good_city_names @@ -172,7 +188,7 @@ def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): ) @if_object_is_used - def save(self): + def save(self) -> None: try: with open(self.cache_path, "w", encoding="utf-8") as f: json.dump(self.cache, f, ensure_ascii=False) @@ -180,15 +196,18 @@ def save(self): logging.warning("Failed to save cache: %s", str(e)) -def process(cities, transfers, filename, cache_path): +def transit_data_to_mapsme( + cities: list[City], transfers: TransfersT, cache_path: str | None +) -> dict: """Generate all output and save to file. :param cities: List of City instances :param transfers: List of sets of StopArea.id - :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ - def find_exits_for_platform(center, nodes): + def find_exits_for_platform( + center: LonLat, nodes: list[OsmElementT] + ) -> list[OsmElementT]: exits = [] min_distance = None for n in nodes: @@ -209,8 +228,8 @@ def find_exits_for_platform(center, nodes): cache = MapsmeCache(cache_path, cities) - stop_areas = {} # stoparea el_id -> StopArea instance - stops = {} # stoparea el_id -> stop jsonified data + stop_areas: dict[IdT, StopArea] = {} + stops: dict[IdT, dict] = {} # stoparea el_id -> stop jsonified data networks = [] good_cities = [c for c in cities if c.is_good] platform_nodes = {} @@ -239,7 +258,7 @@ def find_exits_for_platform(center, nodes): itin.append( [ uid(stop.stoparea.id), - round(stop.distance / SPEED_ON_LINE), + round(stop.distance / DEFAULT_AVE_VEHICLE_SPEED), ] ) # Make exits from platform nodes, @@ -359,21 +378,22 @@ def find_exits_for_platform(center, nodes): stops[stop_id] = st cache.add_stop(stop_id, st) - pairwise_transfers = ( - {} - ) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 - for t_set in transfers: - t = list(t_set) - for t_first in range(len(t) - 1): - for t_second in range(t_first + 1, len(t)): - stoparea1 = t[t_first] - stoparea2 = t[t_second] - if stoparea1.id in stops and stoparea2.id in stops: - uid1 = uid(stoparea1.id) - uid2 = uid(stoparea2.id) + pairwise_transfers: TransferTimesT = {} + for stoparea_id_set in transfers: + stoparea_ids = list(stoparea_id_set) + for i_first in range(len(stoparea_ids) - 1): + for i_second in range(i_first + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[i_first] + stoparea2_id = stoparea_ids[i_second] + if stoparea1_id in stops and stoparea2_id in stops: + uid1 = uid(stoparea1_id) + uid2 = uid(stoparea2_id) uid1, uid2 = sorted([uid1, uid2]) transfer_time = TRANSFER_PENALTY + round( - distance(stoparea1.center, stoparea2.center) + distance( + stop_areas[stoparea1_id].center, + stop_areas[stoparea2_id].center, + ) / SPEED_ON_TRANSFER ) pairwise_transfers[(uid1, uid2)] = transfer_time @@ -382,23 +402,39 @@ def find_exits_for_platform(center, nodes): cache.provide_transfers(pairwise_transfers) cache.save() - pairwise_transfers = [ + pairwise_transfers_list = [ (stop1_uid, stop2_uid, transfer_time) for (stop1_uid, stop2_uid), transfer_time in pairwise_transfers.items() ] result = { "stops": list(stops.values()), - "transfers": pairwise_transfers, + "transfers": pairwise_transfers_list, "networks": networks, } + return result + +def process( + cities: list[City], + transfers: TransfersT, + filename: str, + cache_path: str | None, +) -> None: + """Generate all output and save to file. + :param cities: list of City instances + :param transfers: all collected transfers in the world + :param filename: Path to file to save the result + :param cache_path: Path to json-file with good cities cache or None. + """ if not filename.lower().endswith("json"): filename = f"{filename}.json" + mapsme_transit = transit_data_to_mapsme(cities, transfers, cache_path) + with open(filename, "w", encoding="utf-8") as f: json.dump( - result, + mapsme_transit, f, indent=1, ensure_ascii=False, diff --git a/requirements.txt b/subways/requirements.txt similarity index 100% rename from requirements.txt rename to subways/requirements.txt diff --git a/subways/structure/__init__.py b/subways/structure/__init__.py new file mode 100644 index 00000000..6ef67d13 --- /dev/null +++ b/subways/structure/__init__.py @@ -0,0 +1,17 @@ +from .city import City, get_unused_subway_entrances_geojson +from .route import Route +from .route_master import RouteMaster +from .route_stop import RouteStop +from .station import Station +from .stop_area import StopArea + + +__all__ = [ + "City", + "get_unused_subway_entrances_geojson", + "Route", + "RouteMaster", + "RouteStop", + "Station", + "StopArea", +] diff --git a/subways/structure/city.py b/subways/structure/city.py new file mode 100644 index 00000000..480a0fd6 --- /dev/null +++ b/subways/structure/city.py @@ -0,0 +1,622 @@ +from __future__ import annotations + +from collections import Counter, defaultdict +from collections.abc import Collection, Iterator +from itertools import chain + +from subways.consts import ( + DEFAULT_MODES_OVERGROUND, + DEFAULT_MODES_RAPID, +) +from subways.osm_element import el_center, el_id, get_network +from subways.structure.route import Route +from subways.structure.route_master import RouteMaster +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import ( + IdT, + OsmElementT, + TransfersT, + TransferT, +) + +ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count +ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count + +used_entrances = set() + + +def format_elid_list(ids: Collection[IdT]) -> str: + msg = ", ".join(sorted(ids)[:20]) + if len(ids) > 20: + msg += ", ..." + return msg + + +class City: + route_class = Route + + def __init__(self, city_data: dict, overground: bool = False) -> None: + self.validate_called = False + self.errors: list[str] = [] + self.warnings: list[str] = [] + self.notices: list[str] = [] + self.id = None + self.try_fill_int_attribute(city_data, "id") + self.name = city_data["name"] + self.country = city_data["country"] + self.continent = city_data["continent"] + self.overground = overground + if not overground: + self.try_fill_int_attribute(city_data, "num_stations") + self.try_fill_int_attribute(city_data, "num_lines", "0") + self.try_fill_int_attribute(city_data, "num_light_lines", "0") + self.try_fill_int_attribute(city_data, "num_interchanges", "0") + else: + self.try_fill_int_attribute(city_data, "num_tram_lines", "0") + self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") + self.try_fill_int_attribute(city_data, "num_bus_lines", "0") + self.try_fill_int_attribute(city_data, "num_other_lines", "0") + + # Acquiring list of networks and modes + networks = ( + None + if not city_data["networks"] + else city_data["networks"].split(":") + ) + if not networks or len(networks[-1]) == 0: + self.networks = [] + else: + self.networks = set( + filter(None, [x.strip() for x in networks[-1].split(";")]) + ) + if not networks or len(networks) < 2 or len(networks[0]) == 0: + if self.overground: + self.modes = DEFAULT_MODES_OVERGROUND + else: + self.modes = DEFAULT_MODES_RAPID + else: + self.modes = {x.strip() for x in networks[0].split(",")} + + # Reversing bbox so it is (xmin, ymin, xmax, ymax) + bbox = city_data["bbox"].split(",") + if len(bbox) == 4: + self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] + else: + self.bbox = None + + self.elements: dict[IdT, OsmElementT] = {} + self.stations: dict[IdT, list[StopArea]] = defaultdict(list) + self.routes: dict[str, RouteMaster] = {} # keys are route_master refs + self.masters: dict[IdT, OsmElementT] = {} # Route id → master element + self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) + self.transfers: list[set[StopArea]] = [] + self.station_ids: set[IdT] = set() + self.stops_and_platforms: set[IdT] = set() + self.recovery_data = None + + def try_fill_int_attribute( + self, city_data: dict, attr: str, default: str | None = None + ) -> None: + """Try to convert string value to int. Conversion is considered + to fail if one of the following is true: + * attr is not empty and data type casting fails; + * attr is empty and no default value is given. + In such cases the city is marked as bad by adding an error + to the city validation log. + """ + attr_value = city_data[attr] + if not attr_value and default is not None: + attr_value = default + + try: + attr_int = int(attr_value) + except ValueError: + print_value = ( + f"{city_data[attr]}" if city_data[attr] else "" + ) + self.error( + f"Configuration error: wrong value for {attr}: {print_value}" + ) + setattr(self, attr, 0) + else: + setattr(self, attr, attr_int) + + @staticmethod + def log_message(message: str, el: OsmElementT) -> str: + if el: + tags = el.get("tags", {}) + message += ' ({} {}, "{}")'.format( + el["type"], + el.get("id", el.get("ref")), + tags.get("name", tags.get("ref", "")), + ) + return message + + def notice(self, message: str, el: OsmElementT | None = None) -> None: + """This type of message may point to a potential problem.""" + msg = City.log_message(message, el) + self.notices.append(msg) + + def warn(self, message: str, el: OsmElementT | None = None) -> None: + """A warning is definitely a problem but is doesn't prevent + from building a routing file and doesn't invalidate the city. + """ + msg = City.log_message(message, el) + self.warnings.append(msg) + + def error(self, message: str, el: OsmElementT | None = None) -> None: + """Error is a critical problem that invalidates the city.""" + msg = City.log_message(message, el) + self.errors.append(msg) + + def contains(self, el: OsmElementT) -> bool: + center = el_center(el) + if center: + return ( + self.bbox[0] <= center[1] <= self.bbox[2] + and self.bbox[1] <= center[0] <= self.bbox[3] + ) + return False + + def add(self, el: OsmElementT) -> None: + if el["type"] == "relation" and "members" not in el: + return + + self.elements[el_id(el)] = el + if not (el["type"] == "relation" and "tags" in el): + return + + relation_type = el["tags"].get("type") + if relation_type == "route_master": + for m in el["members"]: + if m["type"] != "relation": + continue + + if el_id(m) in self.masters: + self.error("Route in two route_masters", m) + self.masters[el_id(m)] = el + + elif el["tags"].get("public_transport") == "stop_area": + if relation_type != "public_transport": + self.warn( + "stop_area relation with " + f"type={relation_type}, needed type=public_transport", + el, + ) + return + + warned_about_duplicates = False + for m in el["members"]: + stop_areas = self.stop_areas[el_id(m)] + if el in stop_areas and not warned_about_duplicates: + self.warn("Duplicate element in a stop area", el) + warned_about_duplicates = True + else: + stop_areas.append(el) + + def make_transfer(self, stoparea_group: OsmElementT) -> None: + transfer: set[StopArea] = set() + for m in stoparea_group["members"]: + k = el_id(m) + el = self.elements.get(k) + if not el: + # A stoparea_group member may validly not belong to the city + # while the stoparea_group does - near the city bbox boundary + continue + if "tags" not in el: + self.warn( + "An untagged object {} in a stop_area_group".format(k), + stoparea_group, + ) + continue + if ( + el["type"] != "relation" + or el["tags"].get("type") != "public_transport" + or el["tags"].get("public_transport") != "stop_area" + ): + continue + if k in self.stations: + stoparea = self.stations[k][0] + transfer.add(stoparea) + if stoparea.transfer: + # TODO: properly process such cases. + # Counterexample 1: Paris, + # Châtelet subway station <-> + # "Châtelet - Les Halles" railway station <-> + # Les Halles subway station + # Counterexample 2: Saint-Petersburg, transfers + # Витебский вокзал <-> + # Пушкинская <-> + # Звенигородская + self.warn( + "Stop area {} belongs to multiple interchanges".format( + k + ) + ) + stoparea.transfer = el_id(stoparea_group) + if len(transfer) > 1: + self.transfers.append(transfer) + + def extract_routes(self) -> None: + # Extract stations + processed_stop_areas = set() + for el in self.elements.values(): + if Station.is_station(el, self.modes): + # See PR https://github.com/mapsme/subways/pull/98 + if ( + el["type"] == "relation" + and el["tags"].get("type") != "multipolygon" + ): + rel_type = el["tags"].get("type") + self.warn( + "A railway station cannot be a relation of type " + f"{rel_type}", + el, + ) + continue + st = Station(el, self) + self.station_ids.add(st.id) + if st.id in self.stop_areas: + stations = [] + for sa in self.stop_areas[st.id]: + stations.append(StopArea(st, self, sa)) + else: + stations = [StopArea(st, self)] + + for station in stations: + if station.id not in processed_stop_areas: + processed_stop_areas.add(station.id) + for st_el in station.get_elements(): + self.stations[st_el].append(station) + + # Check that stops and platforms belong to + # a single stop_area + for sp in chain(station.stops, station.platforms): + if sp in self.stops_and_platforms: + self.notice( + f"A stop or a platform {sp} belongs to " + "multiple stop areas, might be correct" + ) + else: + self.stops_and_platforms.add(sp) + + # Extract routes + for el in self.elements.values(): + if Route.is_route(el, self.modes): + if el["tags"].get("access") in ("no", "private"): + continue + route_id = el_id(el) + master_element = self.masters.get(route_id, None) + if self.networks: + network = get_network(el) + if master_element: + master_network = get_network(master_element) + else: + master_network = None + if ( + network not in self.networks + and master_network not in self.networks + ): + continue + + route = self.route_class(el, self, master_element) + if not route.stops: + self.warn("Route has no stops", el) + continue + elif len(route.stops) == 1: + self.warn("Route has only one stop", el) + continue + + master_id = el_id(master_element) or route.ref + route_master = self.routes.setdefault( + master_id, RouteMaster(self, master_element) + ) + route_master.add(route) + + # And while we're iterating over relations, find interchanges + if ( + el["type"] == "relation" + and el.get("tags", {}).get("public_transport", None) + == "stop_area_group" + ): + self.make_transfer(el) + + # Filter transfers, leaving only stations that belong to routes + own_stopareas = set(self.stopareas()) + + self.transfers = [ + inner_transfer + for inner_transfer in ( + own_stopareas.intersection(transfer) + for transfer in self.transfers + ) + if len(inner_transfer) > 1 + ] + + def __iter__(self) -> Iterator[RouteMaster]: + return iter(self.routes.values()) + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_master in self: + for stoparea in route_master.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + @property + def is_good(self) -> bool: + if not (self.errors or self.validate_called): + raise RuntimeError( + "You mustn't refer to City.is_good property before calling " + "the City.validate() method unless an error already occurred." + ) + return len(self.errors) == 0 + + def get_validation_result(self) -> dict: + result = { + "name": self.name, + "country": self.country, + "continent": self.continent, + "stations_found": getattr(self, "found_stations", 0), + "transfers_found": getattr(self, "found_interchanges", 0), + "unused_entrances": getattr(self, "unused_entrances", 0), + "networks": getattr(self, "found_networks", 0), + } + if not self.overground: + result.update( + { + "subwayl_expected": getattr(self, "num_lines", 0), + "lightrl_expected": getattr(self, "num_light_lines", 0), + "subwayl_found": getattr(self, "found_lines", 0), + "lightrl_found": getattr(self, "found_light_lines", 0), + "stations_expected": getattr(self, "num_stations", 0), + "transfers_expected": getattr(self, "num_interchanges", 0), + } + ) + else: + result.update( + { + "stations_expected": 0, + "transfers_expected": 0, + "busl_expected": getattr(self, "num_bus_lines", 0), + "trolleybusl_expected": getattr( + self, "num_trolleybus_lines", 0 + ), + "traml_expected": getattr(self, "num_tram_lines", 0), + "otherl_expected": getattr(self, "num_other_lines", 0), + "busl_found": getattr(self, "found_bus_lines", 0), + "trolleybusl_found": getattr( + self, "found_trolleybus_lines", 0 + ), + "traml_found": getattr(self, "found_tram_lines", 0), + "otherl_found": getattr(self, "found_other_lines", 0), + } + ) + result["warnings"] = self.warnings + result["errors"] = self.errors + result["notices"] = self.notices + return result + + def count_unused_entrances(self) -> None: + global used_entrances + stop_areas = set() + for el in self.elements.values(): + if ( + el["type"] == "relation" + and "tags" in el + and el["tags"].get("public_transport") == "stop_area" + and "members" in el + ): + stop_areas.update([el_id(m) for m in el["members"]]) + unused = [] + not_in_sa = [] + for el in self.elements.values(): + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + i = el_id(el) + if i in self.stations: + used_entrances.add(i) + if i not in stop_areas: + not_in_sa.append(i) + if i not in self.stations: + unused.append(i) + self.unused_entrances = len(unused) + self.entrances_not_in_stop_areas = len(not_in_sa) + if unused: + self.notice( + f"{len(unused)} subway entrances are not connected to a " + f"station: {format_elid_list(unused)}" + ) + if not_in_sa: + self.notice( + f"{len(not_in_sa)} subway entrances are not in stop_area " + f"relations: {format_elid_list(not_in_sa)}" + ) + + def validate_lines(self) -> None: + self.found_light_lines = len( + [x for x in self.routes.values() if x.mode != "subway"] + ) + self.found_lines = len(self.routes) - self.found_light_lines + if self.found_lines != self.num_lines: + self.error( + "Found {} subway lines, expected {}".format( + self.found_lines, self.num_lines + ) + ) + if self.found_light_lines != self.num_light_lines: + self.error( + "Found {} light rail lines, expected {}".format( + self.found_light_lines, self.num_light_lines + ) + ) + + def validate_overground_lines(self) -> None: + self.found_tram_lines = len( + [x for x in self.routes.values() if x.mode == "tram"] + ) + self.found_bus_lines = len( + [x for x in self.routes.values() if x.mode == "bus"] + ) + self.found_trolleybus_lines = len( + [x for x in self.routes.values() if x.mode == "trolleybus"] + ) + self.found_other_lines = len( + [ + x + for x in self.routes.values() + if x.mode not in ("bus", "trolleybus", "tram") + ] + ) + if self.found_tram_lines != self.num_tram_lines: + log_function = ( + self.error if self.found_tram_lines == 0 else self.notice + ) + log_function( + "Found {} tram lines, expected {}".format( + self.found_tram_lines, self.num_tram_lines + ), + ) + + def validate(self) -> None: + networks = Counter() + self.found_stations = 0 + unused_stations = set(self.station_ids) + for rmaster in self.routes.values(): + networks[str(rmaster.network)] += 1 + if not self.overground: + rmaster.check_return_routes() + route_stations = set() + for sa in rmaster.stopareas(): + route_stations.add(sa.transfer or sa.id) + unused_stations.discard(sa.station.id) + self.found_stations += len(route_stations) + if unused_stations: + self.unused_stations = len(unused_stations) + self.notice( + "{} unused stations: {}".format( + self.unused_stations, format_elid_list(unused_stations) + ) + ) + self.count_unused_entrances() + self.found_interchanges = len(self.transfers) + + if self.overground: + self.validate_overground_lines() + else: + self.validate_lines() + + if self.found_stations != self.num_stations: + msg = "Found {} stations in routes, expected {}".format( + self.found_stations, self.num_stations + ) + log_function = ( + self.error + if self.num_stations > 0 + and not ( + 0 + <= (self.num_stations - self.found_stations) + / self.num_stations + <= ALLOWED_STATIONS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + if self.found_interchanges != self.num_interchanges: + msg = "Found {} interchanges, expected {}".format( + self.found_interchanges, self.num_interchanges + ) + log_function = ( + self.error + if self.num_interchanges != 0 + and not ( + (self.num_interchanges - self.found_interchanges) + / self.num_interchanges + <= ALLOWED_TRANSFERS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + self.found_networks = len(networks) + if len(networks) > max(1, len(self.networks)): + n_str = "; ".join( + ["{} ({})".format(k, v) for k, v in networks.items()] + ) + self.notice("More than one network: {}".format(n_str)) + + self.validate_called = True + + def calculate_distances(self) -> None: + for route_master in self: + for route in route_master: + route.calculate_distances() + + +def find_transfers( + elements: list[OsmElementT], cities: Collection[City] +) -> TransfersT: + """As for now, two Cities may contain the same stoparea, but those + StopArea instances would have different python id. So we don't store + references to StopAreas, but only their ids. This is important at + inter-city interchanges. + """ + stop_area_groups = [ + el + for el in elements + if el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" + ] + + stopareas_in_cities_ids = set( + stoparea.id + for city in cities + if city.is_good + for stoparea in city.stopareas() + ) + + transfers = [] + for stop_area_group in stop_area_groups: + transfer: TransferT = set( + member_id + for member_id in ( + el_id(member) for member in stop_area_group["members"] + ) + if member_id in stopareas_in_cities_ids + ) + if len(transfer) > 1: + transfers.append(transfer) + return transfers + + +def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: + global used_entrances + features = [] + for el in elements: + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + if el_id(el) not in used_entrances: + geometry = {"type": "Point", "coordinates": el_center(el)} + properties = { + k: v + for k, v in el["tags"].items() + if k not in ("railway", "entrance") + } + features.append( + { + "type": "Feature", + "geometry": geometry, + "properties": properties, + } + ) + return {"type": "FeatureCollection", "features": features} diff --git a/subways/structure/route.py b/subways/structure/route.py new file mode 100644 index 00000000..f2ff3c3c --- /dev/null +++ b/subways/structure/route.py @@ -0,0 +1,938 @@ +from __future__ import annotations + +import re +import typing +from collections.abc import Callable, Collection, Iterator +from itertools import islice + +from subways.consts import ( + CONSTRUCTION_KEYS, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, +) +from subways.css_colours import normalize_colour +from subways.geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + project_on_line, +) +from subways.osm_element import el_id, el_center, get_network +from subways.structure.route_stop import RouteStop +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import CriticalValidationError, IdT, OsmElementT, RailT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") + +ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees +DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees + + +def parse_time_range( + opening_hours: str, +) -> tuple[tuple[int, int], tuple[int, int]] | None: + """Very simplified method to parse OSM opening_hours tag. + We simply take the first HH:MM-HH:MM substring which is the most probable + opening hours interval for the most of the weekdays. + """ + if opening_hours == "24/7": + return (0, 0), (24, 0) + + m = START_END_TIMES_RE.match(opening_hours) + if not m: + return None + ints = tuple(map(int, m.groups())) + if ints[1] > 59 or ints[3] > 59: + return None + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) + return start_time, end_time + + +def osm_interval_to_seconds(interval_str: str) -> int | None: + """Convert to int an OSM value for 'interval'/'headway'/'duration' tag + which may be in these formats: + HH:MM:SS, + HH:MM, + MM, + M + (https://wiki.openstreetmap.org/wiki/Key:interval#Format) + """ + hours, minutes, seconds = 0, 0, 0 + semicolon_count = interval_str.count(":") + try: + if semicolon_count == 0: + minutes = int(interval_str) + elif semicolon_count == 1: + hours, minutes = map(int, interval_str.split(":")) + elif semicolon_count == 2: + hours, minutes, seconds = map(int, interval_str.split(":")) + else: + return None + except ValueError: + return None + + if seconds < 0 or minutes < 0 or hours < 0: + return None + if semicolon_count > 0 and (seconds >= 60 or minutes >= 60): + return None + + interval = seconds + 60 * minutes + 60 * 60 * hours + if interval == 0: + return None + return interval + + +def get_interval_in_seconds_from_tags( + tags: dict, keys: str | Collection[str] +) -> int | None: + """Extract time interval value from tags for keys among "keys". + E.g., "interval" and "headway" means the same in OSM. + Examples: + interval=5 => 300 + headway:peak=00:01:30 => 90 + """ + if isinstance(keys, str): + keys = (keys,) + + value = None + for key in keys: + if key in tags: + value = tags[key] + break + if value is None: + for key in keys: + if value: + break + for tag_name in tags: + if tag_name.startswith(key + ":"): + value = tags[tag_name] + break + if not value: + return None + return osm_interval_to_seconds(value) + + +def get_route_interval(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, ("interval", "headway")) + + +def get_route_duration(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, "duration") + + +class Route: + """The longest route for a city with a unique ref.""" + + @staticmethod + def is_route(el: OsmElementT, modes: set[str]) -> bool: + if ( + el["type"] != "relation" + or el.get("tags", {}).get("type") != "route" + ): + return False + if "members" not in el: + return False + if el["tags"].get("route") not in modes: + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + if "ref" not in el["tags"] and "name" not in el["tags"]: + return False + return True + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_stop in self: + stoparea = route_stop.stoparea + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def __init__( + self, + relation: OsmElementT, + city: City, + master: OsmElementT | None = None, + ) -> None: + assert Route.is_route( + relation, city.modes + ), f"The relation does not seem to be a route: {relation}" + self.city = city + self.element: OsmElementT = relation + self.id: IdT = el_id(relation) + + self.ref = None + self.name = None + self.mode = None + self.colour = None + self.infill = None + self.network = None + self.interval = None + self.duration = None + self.start_time = None + self.end_time = None + self.is_circular = False + self.stops: list[RouteStop] = [] + # Would be a list of (lon, lat) for the longest stretch. Can be empty. + self.tracks = None + # Index of the first stop that is located on/near the self.tracks + self.first_stop_on_rails_index = None + # Index of the last stop that is located on/near the self.tracks + self.last_stop_on_rails_index = None + + self.process_tags(master) + stop_position_elements = self.process_stop_members() + self.process_tracks(stop_position_elements) + + def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: + line_nodes: set[IdT] = set() + last_track: list[IdT] = [] + track: list[IdT] = [] + warned_about_holes = False + for m in self.element["members"]: + el = self.city.elements.get(el_id(m), None) + if not el or not StopArea.is_track(el): + continue + if "nodes" not in el or len(el["nodes"]) < 2: + self.city.error("Cannot find nodes in a railway", el) + continue + nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] + if m["role"] == "backward": + nodes.reverse() + line_nodes.update(nodes) + if not track: + is_first = True + track.extend(nodes) + else: + new_segment = list(nodes) # copying + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + elif new_segment[-1] == track[-1]: + track.extend(reversed(new_segment[:-1])) + elif is_first and track[0] in ( + new_segment[0], + new_segment[-1], + ): + # We can reverse the track and try again + track.reverse() + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + else: + track.extend(reversed(new_segment[:-1])) + else: + # Store the track if it is long and clean it + if not warned_about_holes: + self.city.warn( + "Hole in route rails near node {}".format( + track[-1] + ), + self.element, + ) + warned_about_holes = True + if len(track) > len(last_track): + last_track = track + track = [] + is_first = False + if len(track) > len(last_track): + last_track = track + # Remove duplicate points + last_track = [ + last_track[i] + for i in range(0, len(last_track)) + if i == 0 or last_track[i - 1] != last_track[i] + ] + return last_track, line_nodes + + def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: + projected = [project_on_line(x.stop, self.tracks) for x in self.stops] + + def stop_near_tracks_criterion(stop_index: int) -> bool: + return ( + projected[stop_index]["projected_point"] is not None + and distance( + self.stops[stop_index].stop, + projected[stop_index]["projected_point"], + ) + <= MAX_DISTANCE_STOP_TO_LINE + ) + + return projected, stop_near_tracks_criterion + + def project_stops_on_line(self) -> dict: + projected, stop_near_tracks_criterion = self.get_stop_projections() + + projected_stops_data = { + "first_stop_on_rails_index": None, + "last_stop_on_rails_index": None, + "stops_on_longest_line": [], # list [{'route_stop': RouteStop, + # 'coords': LonLat, + # 'positions_on_rails': [] } + } + first_index = 0 + while first_index < len(self.stops) and not stop_near_tracks_criterion( + first_index + ): + first_index += 1 + projected_stops_data["first_stop_on_rails_index"] = first_index + + last_index = len(self.stops) - 1 + while last_index > projected_stops_data[ + "first_stop_on_rails_index" + ] and not stop_near_tracks_criterion(last_index): + last_index -= 1 + projected_stops_data["last_stop_on_rails_index"] = last_index + + for i, route_stop in enumerate(self.stops): + if not first_index <= i <= last_index: + continue + + if projected[i]["projected_point"] is None: + self.city.error( + 'Stop "{}" {} is nowhere near the tracks'.format( + route_stop.stoparea.name, route_stop.stop + ), + self.element, + ) + else: + stop_data = { + "route_stop": route_stop, + "coords": None, + "positions_on_rails": None, + } + projected_point = projected[i]["projected_point"] + # We've got two separate stations with a good stretch of + # railway tracks between them. Put these on tracks. + d = round(distance(route_stop.stop, projected_point)) + if d > MAX_DISTANCE_STOP_TO_LINE: + self.city.notice( + 'Stop "{}" {} is {} meters from the tracks'.format( + route_stop.stoparea.name, route_stop.stop, d + ), + self.element, + ) + else: + stop_data["coords"] = projected_point + stop_data["positions_on_rails"] = projected[i][ + "positions_on_line" + ] + projected_stops_data["stops_on_longest_line"].append(stop_data) + return projected_stops_data + + def calculate_distances(self) -> None: + dist = 0 + vertex = 0 + for i, stop in enumerate(self.stops): + if i > 0: + direct = distance(stop.stop, self.stops[i - 1].stop) + d_line = None + if ( + self.first_stop_on_rails_index + <= i + <= self.last_stop_on_rails_index + ): + d_line = distance_on_line( + self.stops[i - 1].stop, stop.stop, self.tracks, vertex + ) + if d_line and direct - 10 <= d_line[0] <= direct * 2: + vertex = d_line[1] + dist += round(d_line[0]) + else: + dist += round(direct) + stop.distance = dist + + def process_tags(self, master: OsmElementT) -> None: + relation = self.element + tags = relation["tags"] + master_tags = {} if not master else master["tags"] + if "ref" not in tags and "ref" not in master_tags: + self.city.notice("Missing ref on a route", relation) + self.ref = tags.get( + "ref", master_tags.get("ref", tags.get("name", None)) + ) + self.name = tags.get("name", None) + self.mode = tags["route"] + if ( + "colour" not in tags + and "colour" not in master_tags + and self.mode != "tram" + ): + self.city.notice("Missing colour on a route", relation) + try: + self.colour = normalize_colour( + tags.get("colour", master_tags.get("colour", None)) + ) + except ValueError as e: + self.colour = None + self.city.warn(str(e), relation) + try: + self.infill = normalize_colour( + tags.get( + "colour:infill", master_tags.get("colour:infill", None) + ) + ) + except ValueError as e: + self.infill = None + self.city.warn(str(e), relation) + self.network = get_network(relation) + self.interval = get_route_interval(tags) or get_route_interval( + master_tags + ) + self.duration = get_route_duration(tags) or get_route_duration( + master_tags + ) + parsed_time_range = parse_time_range( + tags.get("opening_hours", master_tags.get("opening_hours", "")) + ) + if parsed_time_range: + self.start_time, self.end_time = parsed_time_range + + if tags.get("public_transport:version") == "1": + self.city.warn( + "Public transport version is 1, which means the route " + "is an unsorted pile of objects", + relation, + ) + + def process_stop_members(self) -> list[OsmElementT]: + stations: set[StopArea] = set() # temporary for recording stations + seen_stops = False + seen_platforms = False + repeat_pos = None + stop_position_elements: list[OsmElementT] = [] + for m in self.element["members"]: + if "inactive" in m["role"]: + continue + k = el_id(m) + if k in self.city.stations: + st_list = self.city.stations[k] + st = st_list[0] + if len(st_list) > 1: + self.city.error( + f"Ambiguous station {st.name} in route. Please " + "use stop_position or split interchange stations", + self.element, + ) + el = self.city.elements[k] + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + if m["role"] and actual_role not in m["role"]: + self.city.warn( + "Wrong role '{}' for {} {}".format( + m["role"], actual_role, k + ), + self.element, + ) + if repeat_pos is None: + if not self.stops or st not in stations: + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + elif self.stops[-1].stoparea.id == st.id: + stop = self.stops[-1] + else: + # We've got a repeat + if ( + (seen_stops and seen_platforms) + or ( + actual_role == "stop" + and not seen_platforms + ) + or ( + actual_role == "platform" + and not seen_stops + ) + ): + # Circular route! + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + else: + repeat_pos = 0 + if repeat_pos is not None: + if repeat_pos >= len(self.stops): + continue + # Check that the type matches + if (actual_role == "stop" and seen_stops) or ( + actual_role == "platform" and seen_platforms + ): + self.city.error( + 'Found an out-of-place {}: "{}" ({})'.format( + actual_role, el["tags"].get("name", ""), k + ), + self.element, + ) + continue + # Find the matching stop starting with index repeat_pos + while ( + repeat_pos < len(self.stops) + and self.stops[repeat_pos].stoparea.id != st.id + ): + repeat_pos += 1 + if repeat_pos >= len(self.stops): + self.city.error( + "Incorrect order of {}s at {}".format( + actual_role, k + ), + self.element, + ) + continue + stop = self.stops[repeat_pos] + + stop.add(m, self.element, self.city) + if repeat_pos is None: + seen_stops |= stop.seen_stop or stop.seen_station + seen_platforms |= stop.seen_platform + + if StopArea.is_stop(el): + stop_position_elements.append(el) + + continue + + if k not in self.city.elements: + if "stop" in m["role"] or "platform" in m["role"]: + raise CriticalValidationError( + f"{m['role']} {m['type']} {m['ref']} for route " + f"relation {self.element['id']} is not in the dataset" + ) + continue + el = self.city.elements[k] + if "tags" not in el: + self.city.error( + f"Untagged object {k} in a route", self.element + ) + continue + + is_under_construction = False + for ck in CONSTRUCTION_KEYS: + if ck in el["tags"]: + self.city.warn( + f"Under construction {m['role'] or 'feature'} {k} " + "in route. Consider setting 'inactive' role or " + "removing construction attributes", + self.element, + ) + is_under_construction = True + break + if is_under_construction: + continue + + if Station.is_station(el, self.city.modes): + # A station may be not included in this route due to previous + # 'stop area has multiple stations' error. No other error + # message is needed. + pass + elif el["tags"].get("railway") in ("station", "halt"): + self.city.error( + "Missing station={} on a {}".format(self.mode, m["role"]), + el, + ) + else: + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + self.city.error( + f"{actual_role} {m['type']} {m['ref']} is not " + "connected to a station in route", + self.element, + ) + elif not StopArea.is_track(el): + self.city.warn( + "Unknown member type for {} {} in route".format( + m["type"], m["ref"] + ), + self.element, + ) + return stop_position_elements + + def process_tracks( + self, stop_position_elements: list[OsmElementT] + ) -> None: + tracks, line_nodes = self.build_longest_line() + + for stop_el in stop_position_elements: + stop_id = el_id(stop_el) + if stop_id not in line_nodes: + self.city.warn( + 'Stop position "{}" ({}) is not on tracks'.format( + stop_el["tags"].get("name", ""), stop_id + ), + self.element, + ) + + # self.tracks would be a list of (lon, lat) for the longest stretch. + # Can be empty. + self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] + if ( + None in self.tracks + ): # usually, extending BBOX for the city is needed + self.tracks = [] + for n in filter(lambda x: x not in self.city.elements, tracks): + self.city.warn( + f"The dataset is missing the railway tracks node {n}", + self.element, + ) + break + + if len(self.stops) > 1: + self.is_circular = ( + self.stops[0].stoparea == self.stops[-1].stoparea + ) + if ( + self.is_circular + and self.tracks + and self.tracks[0] != self.tracks[-1] + ): + self.city.warn( + "Non-closed rail sequence in a circular route", + self.element, + ) + + projected_stops_data = self.project_stops_on_line() + self.check_and_recover_stops_order(projected_stops_data) + self.apply_projected_stops_data(projected_stops_data) + + def apply_projected_stops_data(self, projected_stops_data: dict) -> None: + """Store better stop coordinates and indexes of first/last stops + that lie on a continuous track line, to the instance attributes. + """ + for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): + setattr(self, attr, projected_stops_data[attr]) + + for stop_data in projected_stops_data["stops_on_longest_line"]: + route_stop = stop_data["route_stop"] + route_stop.positions_on_rails = stop_data["positions_on_rails"] + if stop_coords := stop_data["coords"]: + route_stop.stop = stop_coords + + def get_extended_tracks(self) -> RailT: + """Amend tracks with points of leading/trailing self.stops + that were not projected onto the longest tracks line. + Return a new array. + """ + if self.first_stop_on_rails_index >= len(self.stops): + tracks = [route_stop.stop for route_stop in self.stops] + else: + tracks = ( + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i < self.first_stop_on_rails_index + ] + + self.tracks + + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i > self.last_stop_on_rails_index + ] + ) + return tracks + + def get_truncated_tracks(self, tracks: RailT) -> RailT: + """Truncate leading/trailing segments of `tracks` param + that are beyond the first and last stop locations. + Return a new array. + """ + if self.is_circular: + return tracks.copy() + + first_stop_location = find_segment(self.stops[0].stop, tracks, 0) + last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) + + if last_stop_location != (None, None): + seg2, u2 = last_stop_location + if u2 == 0.0: + # Make seg2 the segment the last_stop_location is + # at the middle or end of + seg2 -= 1 + # u2 = 1.0 + if seg2 + 2 < len(tracks): + tracks = tracks[0 : seg2 + 2] # noqa E203 + tracks[-1] = self.stops[-1].stop + + if first_stop_location != (None, None): + seg1, u1 = first_stop_location + if u1 == 1.0: + # Make seg1 the segment the first_stop_location is + # at the beginning or middle of + seg1 += 1 + # u1 = 0.0 + if seg1 > 0: + tracks = tracks[seg1:] + tracks[0] = self.stops[0].stop + + return tracks + + def are_tracks_complete(self) -> bool: + return ( + self.first_stop_on_rails_index == 0 + and self.last_stop_on_rails_index == len(self) - 1 + ) + + def get_tracks_geometry(self) -> RailT: + tracks = self.get_extended_tracks() + tracks = self.get_truncated_tracks(tracks) + return tracks + + def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: + disorder_warnings = [] + disorder_errors = [] + for i, route_stop in enumerate( + islice(self.stops, 1, len(self.stops) - 1), start=1 + ): + angle = angle_between( + self.stops[i - 1].stop, + route_stop.stop, + self.stops[i + 1].stop, + ) + if angle < ALLOWED_ANGLE_BETWEEN_STOPS: + msg = ( + "Angle between stops around " + f'"{route_stop.stoparea.name}" {route_stop.stop} ' + f"is too narrow, {angle} degrees" + ) + if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: + disorder_errors.append(msg) + else: + disorder_warnings.append(msg) + return disorder_warnings, disorder_errors + + def check_stops_order_on_tracks_direct( + self, stop_sequence: Iterator[dict] + ) -> str | None: + """Checks stops order on tracks, following stop_sequence + in direct order only. + :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', + 'coords'} for RouteStops that belong to the longest contiguous + sequence of tracks in a route. + :return: error message on the first order violation or None. + """ + allowed_order_violations = 1 if self.is_circular else 0 + max_position_on_rails = -1 + for stop_data in stop_sequence: + positions_on_rails = stop_data["positions_on_rails"] + suitable_occurrence = 0 + while ( + suitable_occurrence < len(positions_on_rails) + and positions_on_rails[suitable_occurrence] + < max_position_on_rails + ): + suitable_occurrence += 1 + if suitable_occurrence == len(positions_on_rails): + if allowed_order_violations > 0: + suitable_occurrence -= 1 + allowed_order_violations -= 1 + else: + route_stop = stop_data["route_stop"] + return ( + "Stops on tracks are unordered near " + f'"{route_stop.stoparea.name}" {route_stop.stop}' + ) + max_position_on_rails = positions_on_rails[suitable_occurrence] + + def check_stops_order_on_tracks( + self, projected_stops_data: dict + ) -> str | None: + """Checks stops order on tracks, trying direct and reversed + order of stops in the stop_sequence. + :param projected_stops_data: info about RouteStops that belong to the + longest contiguous sequence of tracks in a route. May be changed + if tracks reversing is performed. + :return: error message on the first order violation or None. + """ + error_message = self.check_stops_order_on_tracks_direct( + projected_stops_data["stops_on_longest_line"] + ) + if error_message: + error_message_reversed = self.check_stops_order_on_tracks_direct( + reversed(projected_stops_data["stops_on_longest_line"]) + ) + if error_message_reversed is None: + error_message = None + self.city.warn( + "Tracks seem to go in the opposite direction to stops", + self.element, + ) + self.tracks.reverse() + new_projected_stops_data = self.project_stops_on_line() + projected_stops_data.update(new_projected_stops_data) + + return error_message + + def check_stops_order( + self, projected_stops_data: dict + ) -> tuple[list[str], list[str]]: + ( + angle_disorder_warnings, + angle_disorder_errors, + ) = self.check_stops_order_by_angle() + disorder_on_tracks_error = self.check_stops_order_on_tracks( + projected_stops_data + ) + disorder_warnings = angle_disorder_warnings + disorder_errors = angle_disorder_errors + if disorder_on_tracks_error: + disorder_errors.append(disorder_on_tracks_error) + return disorder_warnings, disorder_errors + + def check_and_recover_stops_order( + self, projected_stops_data: dict + ) -> None: + """ + :param projected_stops_data: may change if we need to reverse tracks + """ + disorder_warnings, disorder_errors = self.check_stops_order( + projected_stops_data + ) + if disorder_warnings or disorder_errors: + resort_success = False + if self.city.recovery_data: + resort_success = self.try_resort_stops() + if resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.warn( + "Fixed with recovery data: " + msg, self.element + ) + + if not resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.error(msg, self.element) + + def try_resort_stops(self) -> bool: + """Precondition: self.city.recovery_data is not None. + Return success of station order recovering.""" + self_stops = {} # station name => RouteStop + for stop in self.stops: + station = stop.stoparea.station + stop_name = station.name + if stop_name == "?" and station.int_name: + stop_name = station.int_name + # We won't programmatically recover routes with repeating stations: + # such cases are rare and deserves manual verification + if stop_name in self_stops: + return False + self_stops[stop_name] = stop + + route_id = (self.colour, self.ref) + if route_id not in self.city.recovery_data: + return False + + stop_names = list(self_stops.keys()) + suitable_itineraries = [] + for itinerary in self.city.recovery_data[route_id]: + itinerary_stop_names = [ + stop["name"] for stop in itinerary["stations"] + ] + if not ( + len(stop_names) == len(itinerary_stop_names) + and sorted(stop_names) == sorted(itinerary_stop_names) + ): + continue + big_station_displacement = False + for it_stop in itinerary["stations"]: + name = it_stop["name"] + it_stop_center = it_stop["center"] + self_stop_center = self_stops[name].stoparea.station.center + if ( + distance(it_stop_center, self_stop_center) + > DISPLACEMENT_TOLERANCE + ): + big_station_displacement = True + break + if not big_station_displacement: + suitable_itineraries.append(itinerary) + + if len(suitable_itineraries) == 0: + return False + elif len(suitable_itineraries) == 1: + matching_itinerary = suitable_itineraries[0] + else: + from_tag = self.element["tags"].get("from") + to_tag = self.element["tags"].get("to") + if not from_tag and not to_tag: + return False + matching_itineraries = [ + itin + for itin in suitable_itineraries + if from_tag + and itin["from"] == from_tag + or to_tag + and itin["to"] == to_tag + ] + if len(matching_itineraries) != 1: + return False + matching_itinerary = matching_itineraries[0] + self.stops = [ + self_stops[stop["name"]] for stop in matching_itinerary["stations"] + ] + return True + + def get_end_transfers(self) -> tuple[IdT, IdT]: + """Using transfer ids because a train can arrive at different + stations within a transfer. But disregard transfer that may give + an impression of a circular route (for example, + Simonis / Elisabeth station and route 2 in Brussels). + """ + return ( + (self[0].stoparea.id, self[-1].stoparea.id) + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ) + else ( + self[0].stoparea.transfer or self[0].stoparea.id, + self[-1].stoparea.transfer or self[-1].stoparea.id, + ) + ) + + def get_transfers_sequence(self) -> list[IdT]: + """Return a list of stoparea or transfer (if not None) ids.""" + transfer_seq = [ + stop.stoparea.transfer or stop.stoparea.id for stop in self + ] + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ): + transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() + return transfer_seq + + def __len__(self) -> int: + return len(self.stops) + + def __getitem__(self, i) -> RouteStop: + return self.stops[i] + + def __iter__(self) -> Iterator[RouteStop]: + return iter(self.stops) + + def __repr__(self) -> str: + return ( + "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " + "circular={}, num_stops={}, line_length={} m, from={}, to={}" + ).format( + self.id, + self.mode, + self.ref, + self.name, + self.network, + self.interval, + self.is_circular, + len(self.stops), + self.stops[-1].distance, + self.stops[0], + self.stops[-1], + ) diff --git a/subways/structure/route_master.py b/subways/structure/route_master.py new file mode 100644 index 00000000..891ae203 --- /dev/null +++ b/subways/structure/route_master.py @@ -0,0 +1,473 @@ +from __future__ import annotations + +import typing +from collections.abc import Iterator +from typing import TypeVar + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.css_colours import normalize_colour +from subways.geom_utils import distance, project_on_line +from subways.osm_element import el_id, get_network +from subways.structure.route import get_route_duration, get_route_interval +from subways.structure.stop_area import StopArea +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.route_stop import RouteStop + + +SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters + +T = TypeVar("T") + + +class RouteMaster: + def __init__(self, city: City, master: OsmElementT = None) -> None: + self.city = city + self.routes = [] + self.best: Route = None # noqa: F821 + self.id: IdT = el_id(master) + self.has_master = master is not None + self.interval_from_master = False + if master: + self.ref = master["tags"].get( + "ref", master["tags"].get("name", None) + ) + try: + self.colour = normalize_colour( + master["tags"].get("colour", None) + ) + except ValueError: + self.colour = None + try: + self.infill = normalize_colour( + master["tags"].get("colour:infill", None) + ) + except ValueError: + self.infill = None + self.network = get_network(master) + self.mode = master["tags"].get( + "route_master", None + ) # This tag is required, but okay + self.name = master["tags"].get("name", None) + self.interval = get_route_interval(master["tags"]) + self.interval_from_master = self.interval is not None + self.duration = get_route_duration(master["tags"]) + else: + self.ref = None + self.colour = None + self.infill = None + self.network = None + self.mode = None + self.name = None + self.interval = None + self.duration = None + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route in self: + for stoparea in route.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def add(self, route: Route) -> None: # noqa: F821 + if not self.network: + self.network = route.network + elif route.network and route.network != self.network: + self.city.error( + 'Route has different network ("{}") from master "{}"'.format( + route.network, self.network + ), + route.element, + ) + + if not self.colour: + self.colour = route.colour + elif route.colour and route.colour != self.colour: + self.city.notice( + 'Route "{}" has different colour from master "{}"'.format( + route.colour, self.colour + ), + route.element, + ) + + if not self.infill: + self.infill = route.infill + elif route.infill and route.infill != self.infill: + self.city.notice( + ( + f'Route "{route.infill}" has different infill colour ' + f'from master "{self.infill}"' + ), + route.element, + ) + + if not self.ref: + self.ref = route.ref + elif route.ref != self.ref: + self.city.notice( + 'Route "{}" has different ref from master "{}"'.format( + route.ref, self.ref + ), + route.element, + ) + + if not self.name: + self.name = route.name + + if not self.mode: + self.mode = route.mode + elif route.mode != self.mode: + self.city.error( + "Incompatible PT mode: master has {} and route has {}".format( + self.mode, route.mode + ), + route.element, + ) + return + + if not self.interval_from_master and route.interval: + if not self.interval: + self.interval = route.interval + else: + self.interval = min(self.interval, route.interval) + + # Choose minimal id for determinancy + if not self.has_master and (not self.id or self.id > route.id): + self.id = route.id + + self.routes.append(route) + if ( + not self.best + or len(route.stops) > len(self.best.stops) + or ( + # Choose route with minimal id for determinancy + len(route.stops) == len(self.best.stops) + and route.element["id"] < self.best.element["id"] + ) + ): + self.best = route + + def get_meaningful_routes(self) -> list[Route]: # noqa: F821 + return [route for route in self if len(route) >= 2] + + def find_twin_routes(self) -> dict[Route, Route]: # noqa: F821 + """Two non-circular routes are twins if they have the same end + stations and opposite directions, and the number of stations is + the same or almost the same. We'll then find stops that are present + in one direction and is missing in another direction - to warn. + """ + + twin_routes = {} # route => "twin" route + + for route in self.get_meaningful_routes(): + if route.is_circular: + continue # Difficult to calculate. TODO(?) in the future + if route in twin_routes: + continue + + route_transfer_ids = set(route.get_transfers_sequence()) + ends = route.get_end_transfers() + ends_reversed = ends[::-1] + + twin_candidates = [ + r + for r in self + if not r.is_circular + and r not in twin_routes + and r.get_end_transfers() == ends_reversed + # If absolute or relative difference in station count is large, + # possibly it's an express version of a route - skip it. + and ( + abs(len(r) - len(route)) <= 2 + or abs(len(r) - len(route)) / max(len(r), len(route)) + <= 0.2 + ) + ] + + if not twin_candidates: + continue + + twin_route = min( + twin_candidates, + key=lambda r: len( + route_transfer_ids ^ set(r.get_transfers_sequence()) + ), + ) + twin_routes[route] = twin_route + twin_routes[twin_route] = route + + return twin_routes + + def check_return_routes(self) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = self.get_meaningful_routes() + + if len(meaningful_routes) == 0: + self.city.error( + f"An empty route master {self.id}. " + "Please set construction:route if it is under construction" + ) + elif len(meaningful_routes) == 1: + log_function = ( + self.city.error + if not self.best.is_circular + else self.city.notice + ) + log_function( + "Only one route in route_master. " + "Please check if it needs a return route", + self.best.element, + ) + else: + self.check_return_circular_routes() + self.check_return_noncircular_routes() + + def check_return_noncircular_routes(self) -> None: + routes = [ + route + for route in self.get_meaningful_routes() + if not route.is_circular + ] + all_ends = {route.get_end_transfers(): route for route in routes} + for route in routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.city.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = self.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def check_return_circular_routes(self) -> None: + routes = { + route + for route in self.get_meaningful_routes() + if route.is_circular + } + routes_having_backward = set() + + for route in routes: + if route in routes_having_backward: + continue + transfer_sequence1 = [ + stop.stoparea.transfer or stop.stoparea.id for stop in route + ] + transfer_sequence1.pop() + for potential_backward_route in routes - {route}: + transfer_sequence2 = [ + stop.stoparea.transfer or stop.stoparea.id + for stop in potential_backward_route + ][ + -2::-1 + ] # truncate repeated first stop and reverse + common_subsequence = self.find_common_circular_subsequence( + transfer_sequence1, transfer_sequence2 + ) + if len(common_subsequence) >= 0.8 * min( + len(transfer_sequence1), len(transfer_sequence2) + ): + routes_having_backward.add(route) + routes_having_backward.add(potential_backward_route) + break + + for route in routes - routes_having_backward: + self.city.notice( + "Route does not have a return direction", route.element + ) + + @staticmethod + def find_common_circular_subsequence( + seq1: list[T], seq2: list[T] + ) -> list[T]: + """seq1 and seq2 are supposed to be stops of some circular routes. + Prerequisites to rely on the result: + - elements of each sequence are not repeated + - the order of stations is not violated. + Under these conditions we don't need LCS algorithm. Linear scan is + sufficient. + """ + i1, i2 = -1, -1 + for i1, x in enumerate(seq1): + try: + i2 = seq2.index(x) + except ValueError: + continue + else: + # x is found both in seq1 and seq2 + break + + if i2 == -1: + return [] + + # Shift cyclically so that the common element takes the first position + # both in seq1 and seq2 + seq1 = seq1[i1:] + seq1[:i1] + seq2 = seq2[i2:] + seq2[:i2] + + common_subsequence = [] + i2 = 0 + for x in seq1: + try: + i2 = seq2.index(x, i2) + except ValueError: + continue + common_subsequence.append(x) + i2 += 1 + if i2 >= len(seq2): + break + return common_subsequence + + def alert_twin_routes_differ( + self, + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route2.id} but not included in {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route1.id} but not included in {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.city.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff( + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) + + def __len__(self) -> int: + return len(self.routes) + + def __getitem__(self, i) -> Route: # noqa: F821 + return self.routes[i] + + def __iter__(self) -> Iterator[Route]: # noqa: F821 + return iter(self.routes) + + def __repr__(self) -> str: + return ( + f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " + f"name={self.name}, network={self.network}, " + f"num_variants={len(self.routes)}" + ) diff --git a/subways/structure/route_stop.py b/subways/structure/route_stop.py new file mode 100644 index 00000000..c67d597a --- /dev/null +++ b/subways/structure/route_stop.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import typing + +from subways.osm_element import el_center, el_id +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import LonLat, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class RouteStop: + def __init__(self, stoparea: StopArea) -> None: + self.stoparea: StopArea = stoparea + self.stop: LonLat = None # Stop position, possibly projected + self.distance = 0 # In meters from the start of the route + self.platform_entry = None # Platform el_id + self.platform_exit = None # Platform el_id + self.can_enter = False + self.can_exit = False + self.seen_stop = False + self.seen_platform_entry = False + self.seen_platform_exit = False + self.seen_station = False + + @property + def seen_platform(self) -> bool: + return self.seen_platform_entry or self.seen_platform_exit + + @staticmethod + def get_actual_role( + el: OsmElementT, role: str, modes: set[str] + ) -> str | None: + if StopArea.is_stop(el): + return "stop" + elif StopArea.is_platform(el): + return "platform" + elif Station.is_station(el, modes): + if "platform" in role: + return "platform" + else: + return "stop" + return None + + def add(self, member: dict, relation: OsmElementT, city: City) -> None: + el = city.elements[el_id(member)] + role = member["role"] + + if StopArea.is_stop(el): + if "platform" in role: + city.warn("Stop position in a platform role in a route", el) + if el["type"] != "node": + city.error("Stop position is not a node", el) + self.stop = el_center(el) + if "entry_only" not in role: + self.can_exit = True + if "exit_only" not in role: + self.can_enter = True + + elif Station.is_station(el, city.modes): + if el["type"] != "node": + city.notice("Station in route is not a node", el) + + if not self.seen_stop and not self.seen_platform: + self.stop = el_center(el) + self.can_enter = True + self.can_exit = True + + elif StopArea.is_platform(el): + if "stop" in role: + city.warn("Platform in a stop role in a route", el) + if "exit_only" not in role: + self.platform_entry = el_id(el) + self.can_enter = True + if "entry_only" not in role: + self.platform_exit = el_id(el) + self.can_exit = True + if not self.seen_stop: + self.stop = el_center(el) + + multiple_check = False + actual_role = RouteStop.get_actual_role(el, role, city.modes) + if actual_role == "platform": + if role == "platform_entry_only": + multiple_check = self.seen_platform_entry + self.seen_platform_entry = True + elif role == "platform_exit_only": + multiple_check = self.seen_platform_exit + self.seen_platform_exit = True + else: + if role != "platform" and "stop" not in role: + city.warn( + f'Platform "{el["tags"].get("name", "")}" ' + f'({el_id(el)}) with invalid role "{role}" in route', + relation, + ) + multiple_check = self.seen_platform + self.seen_platform_entry = True + self.seen_platform_exit = True + elif actual_role == "stop": + multiple_check = self.seen_stop + self.seen_stop = True + if multiple_check: + log_function = city.error if actual_role == "stop" else city.notice + log_function( + f'Multiple {actual_role}s for a station "' + f'{el["tags"].get("name", "")} ' + f"({el_id(el)}) in a route relation", + relation, + ) + + def __repr__(self) -> str: + return ( + "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( + self.stop, + self.platform_entry, + self.platform_exit, + self.stoparea, + ) + ) diff --git a/subways/structure/station.py b/subways/structure/station.py new file mode 100644 index 00000000..f1cd2faa --- /dev/null +++ b/subways/structure/station.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import typing + +from subways.consts import ALL_MODES, CONSTRUCTION_KEYS +from subways.css_colours import normalize_colour +from subways.osm_element import el_center, el_id +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class Station: + def __init__(self, el: OsmElementT, city: City) -> None: + """Call this with a railway=station OSM feature.""" + self.id: IdT = el_id(el) + self.element: OsmElementT = el + self.modes = Station.get_modes(el) + self.name = el["tags"].get("name", "?") + self.int_name = el["tags"].get( + "int_name", el["tags"].get("name:en", None) + ) + try: + self.colour = normalize_colour(el["tags"].get("colour", None)) + except ValueError as e: + self.colour = None + city.warn(str(e), el) + self.center = el_center(el) + if self.center is None: + raise Exception("Could not find center of {}".format(el)) + + @staticmethod + def get_modes(el: OsmElementT) -> set[str]: + modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} + if mode := el["tags"].get("station"): + modes.add(mode) + return modes + + @staticmethod + def is_station(el: OsmElementT, modes: set[str]) -> bool: + # public_transport=station is too ambiguous and unspecific to use, + # so we expect for it to be backed by railway=station. + if ( + "tram" in modes + and el.get("tags", {}).get("railway") == "tram_stop" + ): + return True + if el.get("tags", {}).get("railway") not in ("station", "halt"): + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + # Not checking for station=train, obviously + if "train" not in modes and Station.get_modes(el).isdisjoint(modes): + return False + return True + + def __repr__(self) -> str: + return "Station(id={}, modes={}, name={}, center={})".format( + self.id, ",".join(self.modes), self.name, self.center + ) diff --git a/subways/structure/stop_area.py b/subways/structure/stop_area.py new file mode 100644 index 00000000..913b2c7b --- /dev/null +++ b/subways/structure/stop_area.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +import typing +from itertools import chain + +from subways.consts import RAILWAY_TYPES +from subways.css_colours import normalize_colour +from subways.geom_utils import distance +from subways.osm_element import el_id, el_center +from subways.structure.station import Station +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +MAX_DISTANCE_TO_ENTRANCES = 300 # in meters + + +class StopArea: + @staticmethod + def is_stop(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") == "stop": + return True + if el["tags"].get("public_transport") == "stop_position": + return True + return False + + @staticmethod + def is_platform(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") in ("platform", "platform_edge"): + return True + if el["tags"].get("public_transport") == "platform": + return True + return False + + @staticmethod + def is_track(el: OsmElementT) -> bool: + if el["type"] != "way" or "tags" not in el: + return False + return el["tags"].get("railway") in RAILWAY_TYPES + + def __init__( + self, + station: Station, + city: City, + stop_area: OsmElementT | None = None, + ) -> None: + """Call this with a Station object.""" + + self.element: OsmElementT = stop_area or station.element + self.id: IdT = el_id(self.element) + self.station: Station = station + self.stops = set() # set of el_ids of stop_positions + self.platforms = set() # set of el_ids of platforms + self.exits = set() # el_id of subway_entrance/train_station_entrance + # for leaving the platform + self.entrances = set() # el_id of subway/train_station entrance + # for entering the platform + self.center = None # lon, lat of the station centre point + self.centers = {} # el_id -> (lon, lat) for all elements + self.transfer = None # el_id of a transfer relation + + self.modes = station.modes + self.name = station.name + self.int_name = station.int_name + self.colour = station.colour + + if stop_area: + self.name = stop_area["tags"].get("name", self.name) + self.int_name = stop_area["tags"].get( + "int_name", stop_area["tags"].get("name:en", self.int_name) + ) + try: + self.colour = ( + normalize_colour(stop_area["tags"].get("colour")) + or self.colour + ) + except ValueError as e: + city.warn(str(e), stop_area) + + self._process_members(station, city, stop_area) + else: + self._add_nearby_entrances(station, city) + + if self.exits and not self.entrances: + city.warn( + "Only exits for a station, no entrances", + stop_area or station.element, + ) + if self.entrances and not self.exits: + city.warn("No exits for a station", stop_area or station.element) + + for el in self.get_elements(): + self.centers[el] = el_center(city.elements[el]) + + """Calculate the center point of the station. This algorithm + cannot rely on a station node, since many stop_areas can share one. + Basically it averages center points of all platforms + and stop positions.""" + if len(self.stops) + len(self.platforms) == 0: + self.center = station.center + else: + self.center = [0, 0] + for sp in chain(self.stops, self.platforms): + spc = self.centers[sp] + for i in range(2): + self.center[i] += spc[i] + for i in range(2): + self.center[i] /= len(self.stops) + len(self.platforms) + + def _process_members( + self, station: Station, city: City, stop_area: OsmElementT + ) -> None: + # If we have a stop area, add all elements from it + tracks_detected = False + for m in stop_area["members"]: + k = el_id(m) + m_el = city.elements.get(k) + if not m_el or "tags" not in m_el: + continue + if Station.is_station(m_el, city.modes): + if k != station.id: + city.error("Stop area has multiple stations", stop_area) + elif StopArea.is_stop(m_el): + self.stops.add(k) + elif StopArea.is_platform(m_el): + self.platforms.add(k) + elif (entrance_type := m_el["tags"].get("railway")) in ( + "subway_entrance", + "train_station_entrance", + ): + if m_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", m_el) + if ( + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" + ): + self.entrances.add(k) + if ( + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" + ): + self.exits.add(k) + elif StopArea.is_track(m_el): + tracks_detected = True + + if tracks_detected: + city.warn("Tracks in a stop_area relation", stop_area) + + def _add_nearby_entrances(self, station: Station, city: City) -> None: + center = station.center + for entrance_el in ( + el + for el in city.elements.values() + if "tags" in el + and (entrance_type := el["tags"].get("railway")) + in ("subway_entrance", "train_station_entrance") + ): + entrance_id = el_id(entrance_el) + if entrance_id in city.stop_areas: + continue # This entrance belongs to some stop_area + c_center = el_center(entrance_el) + if ( + c_center + and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES + ): + if entrance_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", entrance_el) + etag = entrance_el["tags"].get("entrance") + if etag != "exit": + self.entrances.add(entrance_id) + if etag != "entrance": + self.exits.add(entrance_id) + + def get_elements(self) -> set[IdT]: + result = {self.id, self.station.id} + result.update(self.entrances) + result.update(self.exits) + result.update(self.stops) + result.update(self.platforms) + return result + + def __repr__(self) -> str: + return ( + f"StopArea(id={self.id}, name={self.name}, station={self.station}," + f" transfer={self.transfer}, center={self.center})" + ) diff --git a/subway_io.py b/subways/subway_io.py similarity index 88% rename from subway_io.py rename to subways/subway_io.py index cbd252a0..3980b4fc 100644 --- a/subway_io.py +++ b/subways/subway_io.py @@ -1,15 +1,26 @@ +from __future__ import annotations + import json import logging +import typing from collections import OrderedDict +from io import BufferedIOBase +from typing import Any, TextIO + +from subways.types import OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea -def load_xml(f): +def load_xml(f: BufferedIOBase | str) -> list[OsmElementT]: try: from lxml import etree except ImportError: import xml.etree.ElementTree as etree - elements = [] + elements: list[OsmElementT] = [] for event, element in etree.iterparse(f): if element.tag in ("node", "way", "relation"): @@ -49,7 +60,7 @@ def load_xml(f): _YAML_SPECIAL_SEQUENCES = ("- ", ": ", "? ") -def _get_yaml_compatible_string(scalar): +def _get_yaml_compatible_string(scalar: Any) -> str: """Enclose string in single quotes in some cases""" string = str(scalar) if string and ( @@ -62,8 +73,8 @@ def _get_yaml_compatible_string(scalar): return string -def dump_yaml(city, f): - def write_yaml(data, f, indent=""): +def dump_yaml(city: City, f: TextIO) -> None: + def write_yaml(data: dict, f: TextIO, indent: str = "") -> None: if isinstance(data, (set, list)): f.write("\n") for i in data: @@ -88,7 +99,7 @@ def write_yaml(data, f, indent=""): routes = [] for route in city: stations = OrderedDict( - [(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()] + [(sa.transfer or sa.id, sa.name) for sa in route.stopareas()] ) rte = { "type": route.mode, @@ -138,10 +149,10 @@ def write_yaml(data, f, indent=""): write_yaml(result, f) -def make_geojson(city, include_tracks_geometry=True): - transfers = set() +def make_geojson(city: City, include_tracks_geometry: bool = True) -> dict: + stopareas_in_transfers: set[StopArea] = set() for t in city.transfers: - transfers.update(t) + stopareas_in_transfers.update(t) features = [] stopareas = set() stops = set() @@ -196,7 +207,7 @@ def make_geojson(city, include_tracks_geometry=True): "name": stoparea.name, "marker-size": "small", "marker-color": "#ff2600" - if stoparea in transfers + if stoparea in stopareas_in_transfers else "#797979", }, } @@ -204,7 +215,7 @@ def make_geojson(city, include_tracks_geometry=True): return {"type": "FeatureCollection", "features": features} -def _dumps_route_id(route_id): +def _dumps_route_id(route_id: tuple[str | None, str | None]) -> str: """Argument is a route_id that depends on route colour and ref. Name can be taken from route_master or can be route's own, we don't take it into consideration. Some of route attributes can be None. The function makes @@ -212,13 +223,13 @@ def _dumps_route_id(route_id): return json.dumps(route_id, ensure_ascii=False) -def _loads_route_id(route_id_dump): +def _loads_route_id(route_id_dump: str) -> tuple[str | None, str | None]: """Argument is a json-encoded identifier of a route. Return a tuple (colour, ref).""" return tuple(json.loads(route_id_dump)) -def read_recovery_data(path): +def read_recovery_data(path: str) -> dict: """Recovery data is a json with data from previous transport builds. It helps to recover cities from some errors, e.g. by resorting shuffled stations in routes.""" @@ -246,11 +257,15 @@ def read_recovery_data(path): return data -def write_recovery_data(path, current_data, cities): +def write_recovery_data( + path: str, current_data: dict, cities: list[City] +) -> None: """Updates recovery data with good cities data and writes to file.""" - def make_city_recovery_data(city): - routes = {} + def make_city_recovery_data( + city: City, + ) -> dict[tuple[str | None, str | None], list[dict]]: + routes: dict[tuple[str | None, str | None], list[dict]] = {} for route in city: # Recovery is based primarily on route/station names/refs. # If route's ref/colour changes, the route won't be used. diff --git a/subways/tests/README.md b/subways/tests/README.md new file mode 100644 index 00000000..44238098 --- /dev/null +++ b/subways/tests/README.md @@ -0,0 +1,10 @@ +To perform tests, run this command from the top directory +of the repository: + +```bash +export PYTHONPATH=$(pwd) +[ -d "subways/tests/.venv" ] || python3 -m venv subways/tests/.venv +source subways/tests/.venv/bin/activate +pip install -r subways/requirements.txt +python -m unittest discover subways +``` diff --git a/tests/__init__.py b/subways/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to subways/tests/__init__.py diff --git a/tests/assets/cities_info_with_bad_values.csv b/subways/tests/assets/cities_info_with_bad_values.csv similarity index 100% rename from tests/assets/cities_info_with_bad_values.csv rename to subways/tests/assets/cities_info_with_bad_values.csv diff --git a/subways/tests/assets/route_masters.osm b/subways/tests/assets/route_masters.osm new file mode 100644 index 00000000..1d466c8f --- /dev/null +++ b/subways/tests/assets/route_masters.osm @@ -0,0 +1,558 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/subways/tests/assets/tiny_world.osm b/subways/tests/assets/tiny_world.osm new file mode 100644 index 00000000..4cd0631e --- /dev/null +++ b/subways/tests/assets/tiny_world.osm @@ -0,0 +1,245 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/subways/tests/assets/tiny_world_gtfs/agency.txt b/subways/tests/assets/tiny_world_gtfs/agency.txt new file mode 100644 index 00000000..aa8ab794 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/agency.txt @@ -0,0 +1,3 @@ +agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone +1,Intersecting 2 metro lines,,,, +2,One light rail line,,,, diff --git a/subways/tests/assets/tiny_world_gtfs/calendar.txt b/subways/tests/assets/tiny_world_gtfs/calendar.txt new file mode 100644 index 00000000..b01e5ade --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/calendar.txt @@ -0,0 +1,2 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +always,1,1,1,1,1,1,1,19700101,30000101 diff --git a/subways/tests/assets/tiny_world_gtfs/frequencies.txt b/subways/tests/assets/tiny_world_gtfs/frequencies.txt new file mode 100644 index 00000000..cbef1723 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/frequencies.txt @@ -0,0 +1,7 @@ +trip_id,start_time,end_time,headway_secs,exact_times +r7,05:00:00,25:00:00,150.0, +r8,05:00:00,25:00:00,150.0, +r12,05:00:00,25:00:00,150.0, +r13,05:00:00,25:00:00,150.0, +r9,05:00:00,25:00:00,150.0, +r10,05:00:00,25:00:00,150.0, diff --git a/subways/tests/assets/tiny_world_gtfs/routes.txt b/subways/tests/assets/tiny_world_gtfs/routes.txt new file mode 100644 index 00000000..a5801ef6 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/routes.txt @@ -0,0 +1,4 @@ +route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order,route_fare_class,line_id,listed_route +r15,1,1,Blue Line,,1,,0000ff,,,,, +r14,1,2,Red Line,,1,,ff0000,,,,, +r11,2,LR,LR Line,,1,,a52a2a,,,,, diff --git a/subways/tests/assets/tiny_world_gtfs/shapes.txt b/subways/tests/assets/tiny_world_gtfs/shapes.txt new file mode 100644 index 00000000..f79438f3 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/shapes.txt @@ -0,0 +1,15 @@ +shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled +7,0.0,0.0,0, +7,0.0047037,0.0047037,1, +7,0.0099397,0.0099397,2, +8,0.0099397,0.0099397,0, +8,0.0047037,0.0047037,1, +8,0.0,0.0,2, +12,0.01,0.0,0, +12,0.0,0.01,1, +13,0.0,0.01,0, +13,0.01,0.0,1, +9,0.0102531,0.0097675,0, +9,0.0143445,0.0124562,1, +10,0.0143597,0.012321,0, +10,0.0103197,0.0096662,1, diff --git a/subways/tests/assets/tiny_world_gtfs/stop_times.txt b/subways/tests/assets/tiny_world_gtfs/stop_times.txt new file mode 100644 index 00000000..775f35a7 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/stop_times.txt @@ -0,0 +1,17 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,checkpoint_id,continuous_pickup,continuous_drop_off +r7,,,n1_plt,0,,,,0,,,, +r7,,,r1_plt,1,,,,741,,,, +r7,,,r3_plt,2,,,,1565,,,, +r8,,,r3_plt,0,,,,0,,,, +r8,,,r1_plt,1,,,,824,,,, +r8,,,n1_plt,2,,,,1565,,,, +r12,,,n4_plt,0,,,,0,,,, +r12,,,r2_plt,1,,,,758,,,, +r12,,,n6_plt,2,,,,1575,,,, +r13,,,n6_plt,0,,,,0,,,, +r13,,,r2_plt,1,,,,817,,,, +r13,,,n4_plt,2,,,,1575,,,, +r9,,,r4_plt,0,,,,0,,,, +r9,,,r16_plt,1,,,,545,,,, +r10,,,r16_plt,0,,,,0,,,, +r10,,,r4_plt,1,,,,538,,,, diff --git a/subways/tests/assets/tiny_world_gtfs/stops.txt b/subways/tests/assets/tiny_world_gtfs/stops.txt new file mode 100644 index 00000000..36c9d8eb --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/stops.txt @@ -0,0 +1,27 @@ +stop_id,stop_code,stop_name,stop_desc,platform_code,platform_name,stop_lat,stop_lon,zone_id,stop_address,stop_url,level_id,location_type,parent_station,wheelchair_boarding,municipality,on_street,at_street,vehicle_type +n1_egress,n1_egress,Station 1,,,,0.0,0.0,,,,,2,n1_st,,,,, +n1_plt,n1_plt,Station 1,,,,0.0,0.0,,,,,0,n1_st,,,,, +n1_st,n1_st,Station 1,,,,0.0,0.0,,,,,1,,,,,, +n201_r3,n201_r3,Station 3 3-1,,,,0.0096747,0.0100717,,,,,2,r3_st,,,,, +n202_r3,n202_r3,Station 3 3-2,,,,0.0096694,0.010187,,,,,2,r3_st,,,,, +n203_r4,n203_r4,Station 7 7-2,,,,0.0104257,0.0095996,,,,,2,r4_st,,,,, +n204_r4,n204_r4,Station 7 7-1,,,,0.010348,0.0095218,,,,,2,r4_st,,,,, +n205_n4,n205_n4,Station 4 4-1,,,,0.0101548,0.0002012,,,,,2,n4_st,,,,, +n4_plt,n4_plt,Station 4,,,,0.01,0.0,,,,,0,n4_st,,,,, +n4_st,n4_st,Station 4,,,,0.01,0.0,,,,,1,,,,,, +n6_egress,n6_egress,Station 6,,,,0.0,0.01,,,,,2,n6_st,,,,, +n6_plt,n6_plt,Station 6,,,,0.0,0.01,,,,,0,n6_st,,,,, +n6_st,n6_st,Station 6,,,,0.0,0.01,,,,,1,,,,,, +r16_egress,r16_egress,Station 8,,,,0.0143778,0.0124055,,,,,2,r16_st,,,,, +r16_plt,r16_plt,Station 8,,,,0.0143778,0.0124055,,,,,0,r16_st,,,,, +r16_st,r16_st,Station 8,,,,0.0143778,0.0124055,,,,,1,,,,,, +r1_egress,r1_egress,Station 2,,,,0.0047037,0.0047037,,,,,2,r1_st,,,,, +r1_plt,r1_plt,Station 2,,,,0.0047037,0.0047037,,,,,0,r1_st,,,,, +r1_st,r1_st,Station 2,,,,0.0047037,0.0047037,,,,,1,,,,,, +r2_egress,r2_egress,Station 5,,,,0.0051474,0.0047719,,,,,2,r2_st,,,,, +r2_plt,r2_plt,Station 5,,,,0.0051474,0.0047719,,,,,0,r2_st,,,,, +r2_st,r2_st,Station 5,,,,0.0051474,0.0047719,,,,,1,,,,,, +r3_plt,r3_plt,Station 3,,,,0.0097589,0.0101204,,,,,0,r3_st,,,,, +r3_st,r3_st,Station 3,,,,0.0097589,0.0101204,,,,,1,,,,,, +r4_plt,r4_plt,Station 7,,,,0.0102864,0.0097169,,,,,0,r4_st,,,,, +r4_st,r4_st,Station 7,,,,0.0102864,0.0097169,,,,,1,,,,,, diff --git a/subways/tests/assets/tiny_world_gtfs/transfers.txt b/subways/tests/assets/tiny_world_gtfs/transfers.txt new file mode 100644 index 00000000..db1ad5ad --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/transfers.txt @@ -0,0 +1,5 @@ +from_stop_id,to_stop_id,transfer_type,min_transfer_time +r3_st,r4_st,0,106 +r4_st,r3_st,0,106 +r1_st,r2_st,0,81 +r2_st,r1_st,0,81 diff --git a/subways/tests/assets/tiny_world_gtfs/trips.txt b/subways/tests/assets/tiny_world_gtfs/trips.txt new file mode 100644 index 00000000..80615596 --- /dev/null +++ b/subways/tests/assets/tiny_world_gtfs/trips.txt @@ -0,0 +1,7 @@ +route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed,average_speed +r15,always,r7,,,,,7,,,,,40.0 +r15,always,r8,,,,,8,,,,,40.0 +r14,always,r12,,,,,12,,,,,9.4 +r14,always,r13,,,,,13,,,,,11.8 +r11,always,r9,,,,,9,,,,,6.5 +r11,always,r10,,,,,10,,,,,6.5 diff --git a/subways/tests/assets/twin_routes.osm b/subways/tests/assets/twin_routes.osm new file mode 100644 index 00000000..38cbe6c6 --- /dev/null +++ b/subways/tests/assets/twin_routes.osm @@ -0,0 +1,578 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/subways/tests/assets/twin_routes_with_divergence.osm b/subways/tests/assets/twin_routes_with_divergence.osm new file mode 100644 index 00000000..057cca36 --- /dev/null +++ b/subways/tests/assets/twin_routes_with_divergence.osm @@ -0,0 +1,680 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/sample_data_for_build_tracks.py b/subways/tests/sample_data_for_build_tracks.py similarity index 94% rename from tests/sample_data_for_build_tracks.py rename to subways/tests/sample_data_for_build_tracks.py index ed1b5891..4436db23 100644 --- a/tests/sample_data_for_build_tracks.py +++ b/subways/tests/sample_data_for_build_tracks.py @@ -1,5 +1,6 @@ -sample_networks = { - "Only 2 stations, no rails": { +metro_samples = [ + { + "name": "Only 2 stations, no rails", "xml": """ @@ -37,7 +38,11 @@ """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -55,7 +60,8 @@ "positions_on_rails": [], }, }, - "Only 2 stations connected with rails": { + { + "name": "Only 2 stations connected with rails", "xml": """ @@ -100,7 +106,11 @@ """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -124,7 +134,8 @@ "positions_on_rails": [[0], [1]], }, }, - "Only 6 stations, no rails": { + { + "name": "Only 6 stations, no rails", "xml": """ @@ -190,7 +201,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -212,7 +227,8 @@ "positions_on_rails": [], }, }, - "One rail line connecting all stations": { + { + "name": "One rail line connecting all stations", "xml": """ @@ -289,7 +305,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -325,7 +345,8 @@ "positions_on_rails": [[0], [1], [2], [3], [4], [5]], }, }, - "One rail line connecting all stations except the last": { + { + "name": "One rail line connecting all stations except the last", "xml": """ @@ -401,7 +422,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -435,7 +460,8 @@ "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist": { + { + "name": "One rail line connecting all stations except the first", "xml": """ @@ -511,7 +537,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -545,7 +575,11 @@ "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist and the last": { + { + "name": ( + "One rail line connecting all stations " + "except the first and the last", + ), "xml": """ @@ -620,7 +654,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -652,7 +690,8 @@ "positions_on_rails": [[0], [1], [2], [3]], }, }, - "One rail line connecting only 2 first stations": { + { + "name": "One rail line connecting only 2 first stations", "xml": """ @@ -725,7 +764,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -753,7 +796,8 @@ "positions_on_rails": [[0], [1]], }, }, - "One rail line connecting only 2 last stations": { + { + "name": "One rail line connecting only 2 last stations", "xml": """ @@ -826,7 +870,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (4.0, 0.0), (5.0, 0.0), @@ -854,7 +902,8 @@ "positions_on_rails": [[0], [1]], }, }, - "One rail connecting all stations and protruding at both ends": { + { + "name": "One rail connecting all stations and protruding at both ends", "xml": """ @@ -937,7 +986,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -977,10 +1030,11 @@ "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "Several rails with reversed order for backward route, " - "connecting all stations and protruding at both ends" - ): { + { + "name": ( + "Several rails with reversed order for backward route, " + "connecting all stations and protruding at both ends" + ), "xml": """ @@ -1069,7 +1123,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -1109,10 +1167,11 @@ "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "One rail laying near all stations requiring station projecting, " - "protruding at both ends" - ): { + { + "name": ( + "One rail laying near all stations requiring station projecting, " + "protruding at both ends" + ), "xml": """ @@ -1189,7 +1248,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (6.0, 0.0), @@ -1227,7 +1290,8 @@ ], }, }, - "One rail laying near all stations except the first and last": { + { + "name": "One rail laying near all stations except the first and last", "xml": """ @@ -1304,7 +1368,11 @@ """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (4.0, 0.0), @@ -1330,7 +1398,8 @@ "positions_on_rails": [[0], [1 / 3], [2 / 3], [1]], }, }, - "Circle route without rails": { + { + "name": "Circle route without rails", "xml": """ @@ -1377,7 +1446,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -1398,7 +1471,8 @@ "positions_on_rails": [], }, }, - "Circle route with closed rail line connecting all stations": { + { + "name": "Circle route with closed rail line connecting all stations", "xml": """ @@ -1455,7 +1529,11 @@ """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [ (0.0, 0.0), (0.0, 1.0), @@ -1488,4 +1566,4 @@ "positions_on_rails": [[0, 4], [1], [2], [3], [0, 4]], }, }, -} +] diff --git a/tests/assets/kuntsevskaya_transfer.osm b/subways/tests/sample_data_for_center_calculation.py similarity index 84% rename from tests/assets/kuntsevskaya_transfer.osm rename to subways/tests/sample_data_for_center_calculation.py index 48bf044e..49ab3688 100644 --- a/tests/assets/kuntsevskaya_transfer.osm +++ b/subways/tests/sample_data_for_center_calculation.py @@ -1,4 +1,7 @@ - +metro_samples = [ + { + "name": "Transfer at Kuntsevskaya", + "xml": """ @@ -80,3 +83,16 @@ +""", # noqa: E501 + "expected_centers": { + "w38836456": {"lat": 55.73064775, "lon": 37.446065950000005}, + "w489951237": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r7588527": {"lat": 55.73066371666667, "lon": 37.44604881666667}, + "r7588528": {"lat": 55.73075192499999, "lon": 37.44609837}, + "r7588561": {"lat": 55.73070782083333, "lon": 37.44607359333334}, + "r13426423": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r100": None, + "r101": None, + }, + }, +] diff --git a/tests/sample_data_for_error_messages.py b/subways/tests/sample_data_for_error_messages.py similarity index 53% rename from tests/sample_data_for_error_messages.py rename to subways/tests/sample_data_for_error_messages.py index 9d5c5fcd..907a077f 100644 --- a/tests/sample_data_for_error_messages.py +++ b/subways/tests/sample_data_for_error_messages.py @@ -1,5 +1,6 @@ -sample_networks = { - "No errors": { +metro_samples = [ + { + "name": "No errors", "xml": """ @@ -38,15 +39,113 @@ """, - "num_stations": 2, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, + "cities_info": [ + { + "num_stations": 2, + }, + ], "errors": [], "warnings": [], "notices": [], }, - "Bad station order": { + { + "name": "Station colour tag present/absent, correct/incorrect, on bear station / with stop_area", # noqa E501 + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "cities_info": [ + { + "num_stations": 6, + }, + ], + "errors": [], + "warnings": [ + 'Unknown colour code: incorrect (node 4, "Station 4")', + 'Unknown colour code: incorrect (node 6, "Station 6")', + ], + "notices": [], + }, + { + "name": "Bad station order", "xml": """ @@ -99,20 +198,20 @@ """, - "num_stations": 4, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, + "cities_info": [ + { + "num_stations": 4, + }, + ], "errors": [ - 'Angle between stops around "Station 3" (2.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', + 'Angle between stops around "Station 3" (2.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 ], "warnings": [], "notices": [], }, - "Angle < 20 degrees": { + { + "name": "Angle < 20 degrees", "xml": """ @@ -159,20 +258,20 @@ """, - "num_stations": 3, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, + "cities_info": [ + { + "num_stations": 3, + }, + ], "errors": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], }, - "Angle between 20 and 45 degrees": { + { + "name": "Angle between 20 and 45 degrees", "xml": """ @@ -219,20 +318,20 @@ """, - "num_stations": 3, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, + "cities_info": [ + { + "num_stations": 3, + }, + ], "errors": [], "warnings": [], "notices": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 2, "Backward")', # noqa: E501 ], }, - "Stops unordered along tracks provided each angle > 45 degrees": { + { + "name": "Unordered stops provided each angle > 45 degrees", "xml": """ @@ -300,17 +399,52 @@ """, - "num_stations": 4, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, + "cities_info": [ + { + "num_stations": 4, + }, + ], "errors": [ - 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) ' - '(relation 1, "Forward")', - 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) ' - '(relation 2, "Backward")', + 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) (relation 1, "Forward")', # noqa: E501 + 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], }, -} + { + "name": ( + "Many different route masters, both on naked stations and " + "stop_positions/stop_areas/transfers, both linear and circular" + ), + "xml_file": "assets/route_masters.osm", + "cities_info": [ + { + "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4 + 3) + + (3 + 3 + 3 + 3 + 3 + 3 + 4), + "num_lines": 8 + 7, + "num_interchanges": 0 + 1, + }, + ], + "errors": [ + 'Only one route in route_master. Please check if it needs a return route (relation 162, "03: 1-2-3")' # noqa: E501 + ], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 155, "02: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 163, "04: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 164, "04: 2-1")', # noqa: E501 + 'Stop Station 2 (1.0, 0.0) is included in the r203 but not included in r204 (relation 204, "2: 3-1")', # noqa: E501 + 'Route does not have a return direction (relation 205, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 206, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-8-1")', # noqa: E501 + 'Route does not have a return direction (relation 168, "C5: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 169, "C5: 3-5-1-3")', # noqa: E501 + ], + }, +] diff --git a/subways/tests/sample_data_for_outputs.py b/subways/tests/sample_data_for_outputs.py new file mode 100644 index 00000000..fd2cf434 --- /dev/null +++ b/subways/tests/sample_data_for_outputs.py @@ -0,0 +1,692 @@ +metro_samples = [ + { + "name": "tiny_world", + "xml_file": """assets/tiny_world.osm""", + "cities_info": [ + { + "id": 1, + "name": "Intersecting 2 metro lines", + "num_stations": 4 + 2, + "num_lines": 2, + "num_interchanges": 1, + "networks": "network-1", + }, + { + "id": 2, + "name": "One light rail line", + "num_stations": 2, + "num_lines": 0, + "num_light_lines": 1, + "networks": "network-2", + }, + ], + "gtfs_dir": "assets/tiny_world_gtfs", + "transfers": [{"r1", "r2"}, {"r3", "r4"}], + "json_dump": """ +{ + "stopareas": { + "n1": { + "id": "n1", + "center": [ + 0, + 0 + ], + "name": "Station 1", + "entrances": [] + }, + "r1": { + "id": "r1", + "center": [ + 0.00470373068, + 0.0047037307 + ], + "name": "Station 2", + "entrances": [] + }, + "r3": { + "id": "r3", + "center": [ + 0.01012040581, + 0.0097589171 + ], + "name": "Station 3", + "entrances": [ + { + "id": "n201", + "name": null, + "ref": "3-1", + "center": [0.01007169217, 0.00967473055] + }, + { + "id": "n202", + "name": null, + "ref": "3-2", + "center": [0.01018702716, 0.00966936613] + } + ] + }, + "n4": { + "id": "n4", + "center": [ + 0, + 0.01 + ], + "name": "Station 4", + "entrances": [ + { + "id": "n205", + "name": null, + "ref": "4-1", + "center": [0.000201163, 0.01015484596] + } + ] + }, + "r2": { + "id": "r2", + "center": [ + 0.0047718624, + 0.00514739839 + ], + "name": "Station 5", + "entrances": [] + }, + "n6": { + "id": "n6", + "center": [ + 0.01, + 0 + ], + "name": "Station 6", + "entrances": [] + }, + "r4": { + "id": "r4", + "center": [ + 0.009716854315, + 0.010286367745 + ], + "name": "Station 7", + "entrances": [ + { + "id": "n204", + "name": null, + "ref": "7-1", + "center": [0.00952183932, 0.01034796501] + }, + { + "id": "n203", + "name": null, + "ref": "7-2", + "center": [0.00959962338, 0.01042574907] + } + ] + }, + "r16": { + "id": "r16", + "center": [ + 0.012405493905, + 0.014377764559999999 + ], + "name": "Station 8", + "entrances": [] + } + }, + "networks": { + "Intersecting 2 metro lines": { + "id": 1, + "name": "Intersecting 2 metro lines", + "routes": [ + { + "id": "r15", + "mode": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "#0000ff", + "infill": null, + "itineraries": [ + { + "id": "r7", + "tracks": [ + [ + 0, + 0 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0.009939661455227341, + 0.009939661455455193 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": null, + "stops": [ + { + "stoparea_id": "n1", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 741 + }, + { + "stoparea_id": "r3", + "distance": 1565 + } + ] + }, + { + "id": "r8", + "tracks": [ + [ + 0.009939661455227341, + 0.009939661455455193 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": null, + "stops": [ + { + "stoparea_id": "r3", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 824 + }, + { + "stoparea_id": "n1", + "distance": 1565 + } + ] + } + ] + }, + { + "id": "r14", + "mode": "subway", + "ref": "2", + "name": "Red Line", + "colour": "#ff0000", + "infill": null, + "itineraries": [ + { + "id": "r12", + "tracks": [ + [ + 0, + 0.01 + ], + [ + 0.01, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": 600, + "stops": [ + { + "stoparea_id": "n4", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 758 + }, + { + "stoparea_id": "n6", + "distance": 1575 + } + ] + }, + { + "id": "r13", + "tracks": [ + [ + 0.01, + 0 + ], + [ + 0, + 0.01 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": 480, + "stops": [ + { + "stoparea_id": "n6", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 817 + }, + { + "stoparea_id": "n4", + "distance": 1575 + } + ] + } + ] + } + ] + }, + "One light rail line": { + "id": 2, + "name": "One light rail line", + "routes": [ + { + "id": "r11", + "mode": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "#a52a2a", + "infill": "#ffffff", + "itineraries": [ + { + "id": "r9", + "tracks": [ + [ + 0.00976752835, + 0.01025306758 + ], + [ + 0.01245616794, + 0.01434446439 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": 300, + "stops": [ + { + "stoparea_id": "r4", + "distance": 0 + }, + { + "stoparea_id": "r16", + "distance": 545 + } + ] + }, + { + "id": "r10", + "tracks": [ + [ + 0.012321033122529725, + 0.014359650255679167 + ], + [ + 0.00966618028, + 0.01031966791 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "duration": 300, + "stops": [ + { + "stoparea_id": "r16", + "distance": 0 + }, + { + "stoparea_id": "r4", + "distance": 538 + } + ] + } + ] + } + ] + } + }, + "transfers": [ + [ + "r1", + "r2" + ], + [ + "r3", + "r4" + ] + ] +} +""", + "mapsme_output": { + "stops": [ + { + "name": "Station 1", + "int_name": None, + "lat": 0.0, + "lon": 0.0, + "osm_type": "node", + "osm_id": 1, + "id": 8, + "entrances": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 2", + "int_name": None, + "lat": 0.0047037307, + "lon": 0.00470373068, + "osm_type": "node", + "osm_id": 2, + "id": 14, + "entrances": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + }, + { + "name": "Station 3", + "int_name": None, + "lat": 0.0097589171, + "lon": 0.01012040581, + "osm_type": "node", + "osm_id": 3, + "id": 30, + "entrances": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + }, + { + "name": "Station 4", + "int_name": None, + "lat": 0.01, + "lon": 0.0, + "osm_type": "node", + "osm_id": 4, + "id": 32, + "entrances": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + }, + { + "name": "Station 5", + "int_name": None, + "lat": 0.00514739839, + "lon": 0.0047718624, + "osm_type": "node", + "osm_id": 5, + "id": 22, + "entrances": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + }, + { + "name": "Station 6", + "int_name": None, + "lat": 0.0, + "lon": 0.01, + "osm_type": "node", + "osm_id": 6, + "id": 48, + "entrances": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 7", + "int_name": None, + "lat": 0.010286367745, + "lon": 0.009716854315, + "osm_type": "node", + "osm_id": 7, + "id": 38, + "entrances": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + }, + { + "name": "Station 8", + "int_name": None, + "lat": 0.014377764559999999, + "lon": 0.012405493905, + "osm_type": "node", + "osm_id": 8, + "id": 134, + "entrances": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + }, + ], + "transfers": [(14, 22, 81), (30, 38, 106)], + "networks": [ + { + "network": "Intersecting 2 metro lines", + "routes": [ + { + "type": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "0000ff", + "route_id": 30, + "itineraries": [ + { + "stops": [[8, 0], [14, 67], [30, 141]], + "interval": 150, + }, + { + "stops": [[30, 0], [14, 74], [8, 141]], + "interval": 150, + }, + ], + }, + { + "type": "subway", + "ref": "2", + "name": "Red Line", + "colour": "ff0000", + "route_id": 28, + "itineraries": [ + { + "stops": [[32, 0], [22, 68], [48, 142]], + "interval": 150, + }, + { + "stops": [[48, 0], [22, 74], [32, 142]], + "interval": 150, + }, + ], + }, + ], + "agency_id": 1, + }, + { + "network": "One light rail line", + "routes": [ + { + "type": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "ffffff", + "route_id": 22, + "itineraries": [ + { + "stops": [[38, 0], [134, 49]], + "interval": 150, + }, + { + "stops": [[134, 0], [38, 48]], + "interval": 150, + }, + ], + "casing": "a52a2a", + } + ], + "agency_id": 2, + }, + ], + }, + }, +] diff --git a/subways/tests/sample_data_for_twin_routes.py b/subways/tests/sample_data_for_twin_routes.py new file mode 100644 index 00000000..5847632a --- /dev/null +++ b/subways/tests/sample_data_for_twin_routes.py @@ -0,0 +1,78 @@ +metro_samples = [ + { + "name": ( + "Many different routes, both on naked stations and stop_positions/stop_areas/transfers, both linear and circular" # noqa: E501 + ), + "xml_file": "assets/twin_routes.osm", + "cities_info": [ + { + "num_stations": (3 + 4 + 5 + 5) + (3 + 6 + 7 + 5 + 6 + 7 + 7), + "num_lines": 4 + 7, + "num_interchanges": 0 + 2, + }, + ], + "twin_routes": { # route master => twin routes + "r10021": {"r151": "r153", "r153": "r151"}, + "r10022": {}, + "r10023": {}, + "C": {}, + "r10001": {"r201": "r202", "r202": "r201"}, + "r10002": {}, + "r10003": {"r205": "r206", "r206": "r205"}, + "r10004": {}, + "r10005": {}, + "r10006": {}, + "C3": {}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 154, "02: 4-3")', + 'Route does not have a return direction (relation 155, "02: 1-3")', + 'Route does not have a return direction (relation 156, "02: 2-4")', + 'Route does not have a return direction (relation 157, "02: 4-1")', + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-2-3-4-5-1")', # noqa: E501 + 'Stop Station 4 (3.0, 0.0) is included in the r205 but not included in r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3-5-6-7")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 6-5-3-2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-5-6-7-8-1")', # noqa: E501 + ], + }, + { + "name": "Twin routes diverging for some extent", + "xml_file": "assets/twin_routes_with_divergence.osm", + "cities_info": [ + { + "num_stations": (22 + 22 + 21 + 21) * 2, + "num_lines": 4 * 2, + "num_interchanges": 0, + }, + ], + "twin_routes": { # route master => twin routes + "r1101": {"r101": "r102", "r102": "r101"}, + "r1102": {"r103": "r104", "r104": "r103"}, + "r1103": {"r105": "r106", "r106": "r105"}, + "r1104": {"r107": "r108", "r108": "r107"}, + "r1201": {"r201": "r202", "r202": "r201"}, + "r1202": {"r203": "r204", "r204": "r203"}, + "r1203": {"r205": "r206", "r206": "r205"}, + "r1204": {"r207": "r208", "r208": "r207"}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r105 but not included in r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 105, "3: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r107 but not included in r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r205 but not included in r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 205, "13: 1-...-9-10-11-...-20")', # noqa: E501 + ], + }, +] diff --git a/tests/test_build_tracks.py b/subways/tests/test_build_tracks.py similarity index 54% rename from tests/test_build_tracks.py rename to subways/tests/test_build_tracks.py index 14ea86b5..2bd4108b 100644 --- a/tests/test_build_tracks.py +++ b/subways/tests/test_build_tracks.py @@ -1,24 +1,13 @@ -""" -To perform tests manually, run this command from the top directory -of the repository: +from subways.tests.sample_data_for_build_tracks import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase -> python -m unittest discover tests -or simply - -> python -m unittest -""" - - -from tests.sample_data_for_build_tracks import sample_networks -from tests.util import TestCase - - -class TestOneRouteTracks(TestCase): +class TestOneRouteTracks(JsonLikeComparisonMixin, TestCase): """Test tracks extending and truncating on one-route networks""" - def prepare_city_routes(self, network) -> tuple: - city = self.validate_city(network) + def prepare_city_routes(self, metro_sample: dict) -> tuple: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] self.assertTrue(city.is_good) @@ -30,56 +19,56 @@ def prepare_city_routes(self, network) -> tuple: return fwd_route, bwd_route - def _test_tracks_extending_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_extending_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) self.assertEqual( fwd_route.tracks, - network_data["tracks"], + metro_sample["tracks"], "Wrong tracks", ) extended_tracks = fwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"], + metro_sample["extended_tracks"], "Wrong tracks after extending", ) self.assertEqual( bwd_route.tracks, - network_data["tracks"][::-1], + metro_sample["tracks"][::-1], "Wrong backward tracks", ) extended_tracks = bwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"][::-1], + metro_sample["extended_tracks"][::-1], "Wrong backward tracks after extending", ) - def _test_tracks_truncating_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_truncating_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) truncated_tracks = fwd_route.get_truncated_tracks(fwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"], + metro_sample["truncated_tracks"], "Wrong tracks after truncating", ) truncated_tracks = bwd_route.get_truncated_tracks(bwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"][::-1], + metro_sample["truncated_tracks"][::-1], "Wrong backward tracks after truncating", ) - def _test_stop_positions_on_rails_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_stop_positions_on_rails_for_network(self, sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(sample) for route, route_label in zip( (fwd_route, bwd_route), ("forward", "backward") ): - route_data = network_data[route_label] + route_data = sample[route_label] for attr in ( "first_stop_on_rails_index", @@ -97,21 +86,27 @@ def _test_stop_positions_on_rails_for_network(self, network_data): rs.positions_on_rails for rs in route.stops[first_ind : last_ind + 1] # noqa E203 ] - self.assertListAlmostEqual( + self.assertSequenceAlmostEqual( positions_on_rails, route_data["positions_on_rails"] ) def test_tracks_extending(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_extending_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_extending_for_network(sample) def test_tracks_truncating(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_truncating_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_truncating_for_network(sample) def test_stop_position_on_rails(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_stop_positions_on_rails_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_stop_positions_on_rails_for_network(sample) diff --git a/tests/test_center_calculation.py b/subways/tests/test_center_calculation.py similarity index 55% rename from tests/test_center_calculation.py rename to subways/tests/test_center_calculation.py index 4f01a3cf..5c83627b 100644 --- a/tests/test_center_calculation.py +++ b/subways/tests/test_center_calculation.py @@ -1,28 +1,28 @@ -import json -from pathlib import Path +import io from unittest import TestCase -from process_subways import calculate_centers -from subway_io import load_xml +from subways.validation import calculate_centers +from subways.subway_io import load_xml +from subways.tests.sample_data_for_center_calculation import metro_samples class TestCenterCalculation(TestCase): """Test center calculation. Test data [should] contain among others the following edge cases: - - an empty relation. It's element should not obtain "center" key. - - relation as member of relation, the child relation following the parent - in the OSM XML file. + - an empty relation. Its element should not obtain "center" key. + - relation as member of another relation, the child relation following + the parent in the OSM XML. - relation with incomplete members (broken references). - relations with cyclic references. """ - ASSETS_PATH = Path(__file__).resolve().parent / "assets" - OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm") - CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json") - - def test__calculate_centers(self) -> None: - elements = load_xml(self.OSM_DATA) + def test_calculate_centers(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_calculate_centers_for_sample(sample) + def _test_calculate_centers_for_sample(self, metro_sample: dict) -> None: + elements = load_xml(io.BytesIO(metro_sample["xml"].encode())) calculate_centers(elements) elements_dict = { @@ -36,12 +36,11 @@ def test__calculate_centers(self) -> None: if "center" in el } - with open(self.CORRECT_CENTERS) as f: - correct_centers = json.load(f) + expected_centers = metro_sample["expected_centers"] - self.assertTrue(set(calculated_centers).issubset(correct_centers)) + self.assertTrue(set(calculated_centers).issubset(expected_centers)) - for k, correct_center in correct_centers.items(): + for k, correct_center in expected_centers.items(): if correct_center is None: self.assertNotIn("center", elements_dict[k]) else: diff --git a/subways/tests/test_error_messages.py b/subways/tests/test_error_messages.py new file mode 100644 index 00000000..d879c85e --- /dev/null +++ b/subways/tests/test_error_messages.py @@ -0,0 +1,36 @@ +import itertools + +from subways.tests.sample_data_for_error_messages import ( + metro_samples as metro_samples_error, +) +from subways.tests.sample_data_for_twin_routes import ( + metro_samples as metro_samples_route_masters, +) +from subways.tests.util import TestCase + + +class TestValidationMessages(TestCase): + """Test that the validator provides expected validation messages + on different types of errors in input OSM data. + """ + + def _test_validation_messages_for_network( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] + + for err_level in ("errors", "warnings", "notices"): + self.assertListEqual( + sorted(getattr(city, err_level)), + sorted(metro_sample[err_level]), + ) + + def test_validation_messages(self) -> None: + for sample in itertools.chain( + metro_samples_error, metro_samples_route_masters + ): + if "errors" not in sample: + continue + with self.subTest(msg=sample["name"]): + self._test_validation_messages_for_network(sample) diff --git a/subways/tests/test_find_transfers.py b/subways/tests/test_find_transfers.py new file mode 100644 index 00000000..294304f7 --- /dev/null +++ b/subways/tests/test_find_transfers.py @@ -0,0 +1,30 @@ +from copy import deepcopy + +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase, JsonLikeComparisonMixin + + +class TestTransfers(JsonLikeComparisonMixin, TestCase): + """Test that the validator provides expected set of transfers.""" + + def _test__find_transfers__for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + expected_transfers = metro_sample["transfers"] + + self.assertSequenceAlmostEqualIgnoreOrder( + expected_transfers, + transfers, + cmp=lambda transfer_as_set: sorted(transfer_as_set), + ) + + def test__find_transfers(self) -> None: + sample1 = metro_samples[0] + + sample2 = deepcopy(metro_samples[0]) + # Make the second city invalid and thus exclude the inter-city transfer + sample2["cities_info"][1]["num_stations"] += 1 + sample2["transfers"] = [{"r1", "r2"}] + + for sample in sample1, sample2: + with self.subTest(msg=sample["name"]): + self._test__find_transfers__for_sample(sample) diff --git a/tests/test_gtfs_processor.py b/subways/tests/test_gtfs_processor.py similarity index 54% rename from tests/test_gtfs_processor.py rename to subways/tests/test_gtfs_processor.py index 5a234e86..d5a4dcff 100644 --- a/tests/test_gtfs_processor.py +++ b/subways/tests/test_gtfs_processor.py @@ -1,9 +1,15 @@ -from unittest import TestCase +import csv +from functools import partial +from pathlib import Path -from processors.gtfs import ( +from subways.processors._common import transit_to_dict +from subways.processors.gtfs import ( dict_to_row, GTFS_COLUMNS, + transit_data_to_gtfs, ) +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase class TestGTFS(TestCase): @@ -94,3 +100,61 @@ def test__dict_to_row__numeric_values(self) -> None: self.assertListEqual( dict_to_row(shape["shape_data"], "shapes"), shape["answer"] ) + + def test__transit_data_to_gtfs(self) -> None: + for metro_sample in metro_samples: + cities, transfers = self.prepare_cities(metro_sample) + calculated_transit_data = transit_to_dict(cities, transfers) + calculated_gtfs_data = transit_data_to_gtfs( + calculated_transit_data + ) + + control_gtfs_data = self._readGtfs( + Path(__file__).resolve().parent / metro_sample["gtfs_dir"] + ) + self._compareGtfs(calculated_gtfs_data, control_gtfs_data) + + @staticmethod + def _readGtfs(gtfs_dir: Path) -> dict: + gtfs_data = dict() + for gtfs_feature in GTFS_COLUMNS: + with open(gtfs_dir / f"{gtfs_feature}.txt") as f: + reader = csv.reader(f) + next(reader) # read header + rows = list(reader) + gtfs_data[gtfs_feature] = rows + return gtfs_data + + def _compareGtfs( + self, calculated_gtfs_data: dict, control_gtfs_data: dict + ) -> None: + for gtfs_feature in GTFS_COLUMNS: + calculated_rows = sorted( + map( + partial(dict_to_row, record_type=gtfs_feature), + calculated_gtfs_data[gtfs_feature], + ) + ) + control_rows = sorted(control_gtfs_data[gtfs_feature]) + + self.assertEqual(len(calculated_rows), len(control_rows)) + + for i, (calculated_row, control_row) in enumerate( + zip(calculated_rows, control_rows) + ): + self.assertEqual( + len(calculated_row), + len(control_row), + f"Different length of {i}-th row of {gtfs_feature}", + ) + for calculated_value, control_value in zip( + calculated_row, control_row + ): + if calculated_value is None: + self.assertEqual(control_value, "", f"in {i}-th row") + else: # convert str to float/int/str + self.assertAlmostEqual( + calculated_value, + type(calculated_value)(control_value), + places=10, + ) diff --git a/subways/tests/test_mapsme_processor.py b/subways/tests/test_mapsme_processor.py new file mode 100644 index 00000000..c77fc6a4 --- /dev/null +++ b/subways/tests/test_mapsme_processor.py @@ -0,0 +1,53 @@ +from operator import itemgetter + +from subways.processors.mapsme import transit_data_to_mapsme +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase + + +class TestMapsme(JsonLikeComparisonMixin, TestCase): + """Test processors/mapsme.py""" + + def test__transit_data_to_mapsme(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test__transit_data_to_mapsme__for_sample(sample) + + def _test__transit_data_to_mapsme__for_sample( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + calculated_mapsme_data = transit_data_to_mapsme( + cities, transfers, cache_path=None + ) + control_mapsme_data = metro_sample["mapsme_output"] + + self.assertSetEqual( + set(control_mapsme_data.keys()), + set(calculated_mapsme_data.keys()), + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["stops"], + calculated_mapsme_data["stops"], + cmp=itemgetter("id"), + unordered_lists={ + "entrances": lambda e: (e["osm_type"], e["osm_id"]), + "exits": lambda e: (e["osm_type"], e["osm_id"]), + }, + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["transfers"], + calculated_mapsme_data["transfers"], + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["networks"], + calculated_mapsme_data["networks"], + cmp=itemgetter("network"), + unordered_lists={ + "routes": itemgetter("route_id"), + "itineraries": lambda it: (it["stops"], it["interval"]), + }, + ) diff --git a/subways/tests/test_overpass.py b/subways/tests/test_overpass.py new file mode 100644 index 00000000..beb03ef8 --- /dev/null +++ b/subways/tests/test_overpass.py @@ -0,0 +1,163 @@ +from unittest import TestCase, mock + +from subways.overpass import compose_overpass_request, overpass_request + + +class TestOverpassQuery(TestCase): + def test__compose_overpass_request__no_bboxes(self) -> None: + bboxes = [] + for overground in (True, False): + with self.subTest(msg=f"{overground=}"): + with self.assertRaises(RuntimeError): + compose_overpass_request(overground, bboxes) + + def test__compose_overpass_request__one_bbox(self) -> None: + bboxes = [[1, 2, 3, 4]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__compose_overpass_request__several_bboxes(self) -> None: + bboxes = [[1, 2, 3, 4], [5, 6, 7, 8]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="light_rail"](5,6,7,8);' + 'rel[route="monorail"](5,6,7,8);' + 'rel[route="subway"](5,6,7,8);' + 'rel[route="train"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](5,6,7,8);" + "node[railway=train_station_entrance](5,6,7,8);" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="aerialway"](5,6,7,8);' + 'rel[route="bus"](5,6,7,8);' + 'rel[route="ferry"](5,6,7,8);' + 'rel[route="tram"](5,6,7,8);' + 'rel[route="trolleybus"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__overpass_request(self) -> None: + overpass_api = "http://overpass.example/" + overground = False + bboxes = [[1, 2, 3, 4]] + expected_url = ( + "http://overpass.example/?data=" + "%5Bout%3Ajson%5D%5Btimeout%3A1000%5D%3B%28%28" + "rel%5Broute%3D%22light_rail%22%5D%281%2C2%2C3%2C4" + "%29%3Brel%5Broute%3D%22monorail%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22subway%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22train%22%5D%281%2C2%2C3%2C4%29%3B%29%3B" + "rel%28br%29%5Btype%3Droute_master%5D%3B" + "node%5Brailway%3Dsubway_entrance%5D%281%2C2%2C3%2C4%29%3B" + "node%5Brailway%3Dtrain_station_entrance%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Bpublic_transport%3Dstop_area%5D%281%2C2%2C3%2C4%29%3B" + "rel%28br%29%5Btype%3Dpublic_transport%5D%5Bpublic_transport%3D" + "stop_area_group%5D%3B%29%3B" + "%28._%3B%3E%3E%3B%29%3Bout%20body%20center%20qt%3B" + ) + + with mock.patch("subways.overpass.json.load") as load_mock: + load_mock.return_value = {"elements": []} + + with mock.patch( + "subways.overpass.urllib.request.urlopen" + ) as urlopen_mock: + urlopen_mock.return_value.getcode.return_value = 200 + + overpass_request(overground, overpass_api, bboxes) + + urlopen_mock.assert_called_once_with(expected_url, timeout=1000) diff --git a/tests/test_prepare_cities.py b/subways/tests/test_prepare_cities.py similarity index 96% rename from tests/test_prepare_cities.py rename to subways/tests/test_prepare_cities.py index 63ddce68..09679c75 100644 --- a/tests/test_prepare_cities.py +++ b/subways/tests/test_prepare_cities.py @@ -2,7 +2,7 @@ from pathlib import Path from unittest import TestCase -from process_subways import prepare_cities +from subways.validation import prepare_cities class TestPrepareCities(TestCase): diff --git a/tests/test_projection.py b/subways/tests/test_projection.py similarity index 86% rename from tests/test_projection.py rename to subways/tests/test_projection.py index b0091aa3..770232c6 100644 --- a/tests/test_projection.py +++ b/subways/tests/test_projection.py @@ -2,17 +2,23 @@ import itertools import unittest -from subway_structure import project_on_segment +from subways.geom_utils import project_on_segment +from subways.types import LonLat class TestProjection(unittest.TestCase): - """Test subway_structure.project_on_segment function""" + """Test subways.geom_utils.project_on_segment function""" PRECISION = 10 # decimal places in assertAlmostEqual SHIFT = 1e-6 # Small distance between projected point and segment endpoint - def _test_projection_in_bulk(self, points, segments, answers): + def _test_projection_in_bulk( + self, + points: list[LonLat], + segments: list[tuple[LonLat, LonLat]], + answers: list[float | None], + ) -> None: """Test 'project_on_segment' function for array of points and array of parallel segments projections on which are equal. """ @@ -39,7 +45,7 @@ def _test_projection_in_bulk(self, points, segments, answers): f"{segment}: {u} returned, {answer} expected", ) - def test_projection_on_horizontal_segments(self): + def test_projection_on_horizontal_segments(self) -> None: points = [ (-2, 0), (-1 - self.SHIFT, 0), @@ -74,7 +80,7 @@ def test_projection_on_horizontal_segments(self): self._test_projection_in_bulk(points, horizontal_segments, answers) - def test_projection_on_vertical_segments(self): + def test_projection_on_vertical_segments(self) -> None: points = [ (0, -2), (0, -1 - self.SHIFT), @@ -109,7 +115,7 @@ def test_projection_on_vertical_segments(self): self._test_projection_in_bulk(points, vertical_segments, answers) - def test_projection_on_inclined_segment(self): + def test_projection_on_inclined_segment(self) -> None: points = [ (-2, -2), (-1, -1), @@ -128,7 +134,7 @@ def test_projection_on_inclined_segment(self): self._test_projection_in_bulk(points, segments, answers) - def test_projection_with_different_collections(self): + def test_projection_with_different_collections(self) -> None: """The tested function should accept points as any consecutive container with index operator. """ @@ -148,7 +154,7 @@ def test_projection_with_different_collections(self): s2 = s2_type(segment_end2) project_on_segment(p, s1, s2) - def test_projection_on_degenerate_segment(self): + def test_projection_on_degenerate_segment(self) -> None: coords = [-1, 0, 1] points = [(x, y) for x, y in itertools.product(coords, coords)] segments = [ diff --git a/subways/tests/test_route.py b/subways/tests/test_route.py new file mode 100644 index 00000000..ec82e41f --- /dev/null +++ b/subways/tests/test_route.py @@ -0,0 +1,141 @@ +from unittest import TestCase + +from subways.structure.route import ( + get_interval_in_seconds_from_tags, + osm_interval_to_seconds, + parse_time_range, +) + + +class TestTimeIntervalsParsing(TestCase): + def test__osm_interval_to_seconds__invalid_value(self) -> None: + intervals = ( + ["", "abc", "x30", "30x", "3x0"] + + ["5:", ":5", "01:05:", ":01:05", "01:01:00:", ":01:01:00"] + + ["01x:05", "01:x5", "x5:01:00", "01:0x:00", "01:01:x"] + + ["-5", "01:-05", "-01:05", "-01:00:00", "01:-01:00", "01:01:-01"] + + ["0", "00:00", "00:00:00"] + + ["00:60", "01:00:60", "01:60:00"] + + ["01:60:61", "01:61:60", "01:61:61"] + ) + for interval in intervals: + with self.subTest(msg=f"value='{interval}'"): + self.assertIsNone(osm_interval_to_seconds(interval)) + + def test__osm_interval_to_seconds__valid_value(self) -> None: + intervals = { + "5": 300, + "65": 3900, + "10:55": 39300, + "02:02:02": 7322, + "2:2:2": 7322, + "00:59": 3540, + "01:00": 3600, + "00:00:50": 50, + "00:10:00": 600, + "01:00:00": 3600, + } + + for interval_str, interval_sec in intervals.items(): + with self.subTest(msg=f"value='{interval_str}'"): + self.assertEqual( + interval_sec, osm_interval_to_seconds(interval_str) + ) + + def test__parse_time_range__invalid_values(self) -> None: + ranges = ( + ["", "a", "ab:cd-ab:cd", "1", "1-2", "01-02"] + + ["24/8", "24/7/365"] + + ["1:00-02:00", "01:0-02:00", "01:00-2:00", "01:00-02:0"] + + ["1x:00-02:00", "01:0x-02:00", "01:00-1x:00", "01:00-02:ab"] + + ["-1:00-02:00", "01:-1-02:00", "01:00--2:00", "01:00-02:-1"] + + ["01;00-02:00", "01:00-02;00", "01:00=02:00"] + + ["01:00-#02:00", "01:00 - 02:00"] + + ["01:60-02:05", "01:00-01:61"] + ) + for r in ranges: + with self.subTest(msg=f"value='{r}'"): + self.assertIsNone(parse_time_range(r)) + + def test__parse_time_range__valid_values(self) -> None: + ranges = ( + ["24/7"] + + ["00:00-00:00", "00:01-00:02"] + + ["01:00-02:00", "02:01-01:02"] + + ["02:00-26:59", "12:01-13:59"] + + ["Mo-Fr 06:00-21:30", "06:00-21:30 (weekdays)"] + + ["Mo-Fr 06:00-21:00; Sa-Su 07:00-20:00"] + ) + answers = [ + ((0, 0), (24, 0)), + ((0, 0), (0, 0)), + ((0, 1), (0, 2)), + ((1, 0), (2, 0)), + ((2, 1), (1, 2)), + ((2, 0), (26, 59)), + ((12, 1), (13, 59)), + ((6, 0), (21, 30)), + ((6, 0), (21, 30)), + ((6, 0), (21, 0)), + ] + + for r, answer in zip(ranges, answers): + with self.subTest(msg=f"value='{r}'"): + self.assertTupleEqual(answer, parse_time_range(r)) + + +class TestRouteIntervals(TestCase): + def test__get_interval_in_seconds_from_tags__one_key(self) -> None: + cases = [ + {"tags": {}, "answer": None}, + {"tags": {"a": "1"}, "answer": None}, + {"tags": {"duration": "1"}, "answer": 60}, + {"tags": {"durationxxx"}, "answer": None}, + {"tags": {"xxxduration"}, "answer": None}, + # prefixes not considered + {"tags": {"ru:duration"}, "answer": None}, + # suffixes considered + {"tags": {"duration:peak": "1"}, "answer": 60}, + # bare tag has precedence over suffixed version + {"tags": {"duration:peak": "1", "duration": "2"}, "answer": 120}, + # first suffixed version apply + {"tags": {"duration:y": "1", "duration:x": "2"}, "answer": 60}, + # other tags present + {"tags": {"a": "x", "duration": "1", "b": "y"}, "answer": 60}, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags( + case["tags"], "duration" + ), + ) + + def test__get_interval_in_seconds_from_tags__several_keys(self) -> None: + keys = ("interval", "headway") + cases = [ + {"tags": {}, "answer": None}, + # prefixes not considered + {"tags": {"ru:interval"}, "answer": None}, + {"tags": {"interval": "1"}, "answer": 60}, + {"tags": {"headway": "1"}, "answer": 60}, + {"tags": {"interval": "1", "headway": "2"}, "answer": 60}, + # interval has precedence due to its position in 'keys' + {"tags": {"headway": "2", "interval": "1"}, "answer": 60}, + # non-suffixed keys has precedence + {"tags": {"interval:peak": "1", "headway": "2"}, "answer": 120}, + # among suffixed versions, first key in 'keys' is used first + { + "tags": {"headway:peak": "2", "interval:peak": "1"}, + "answer": 60, + }, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags(case["tags"], keys), + ) diff --git a/subways/tests/test_route_master.py b/subways/tests/test_route_master.py new file mode 100644 index 00000000..77ddf213 --- /dev/null +++ b/subways/tests/test_route_master.py @@ -0,0 +1,114 @@ +from subways.structure.route_master import RouteMaster +from subways.tests.sample_data_for_twin_routes import metro_samples +from subways.tests.util import TestCase + + +class TestRouteMaster(TestCase): + def test__find_common_circular_subsequence(self) -> None: + cases = [ + { # the 1st sequence is empty + "sequence1": [], + "sequence2": [1, 2, 3, 4], + "answer": [], + }, + { # the 2nd sequence is empty + "sequence1": [1, 2, 3, 4], + "sequence2": [], + "answer": [], + }, + { # equal sequences + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 3, 4], + }, + { # one sequence is a cyclic shift of the other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 1, 2, 3], + "answer": [1, 2, 3, 4], + }, + { # the 2nd sequence is a subsequence of the 1st; equal ends + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 4], + "answer": [1, 2, 4], + }, + { # the 1st sequence is a subsequence of the 2nd; equal ends + "sequence1": [1, 2, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 4], + }, + { # the 2nd sequence is an innter subsequence of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 3], + "answer": [2, 3], + }, + { # the 1st sequence is an inner subsequence of the 2nd + "sequence1": [2, 3], + "sequence2": [1, 2, 3, 4], + "answer": [2, 3], + }, + { # the 2nd sequence is a continuation of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 5, 6], + "answer": [4], + }, + { # the 1st sequence is a continuation of the 2nd + "sequence1": [4, 5, 6], + "sequence2": [1, 2, 3, 4], + "answer": [4], + }, + { # no common elements + "sequence1": [1, 2, 3, 4], + "sequence2": [5, 6, 7], + "answer": [], + }, + { # one sequence is the reversed other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 3, 2, 1], + "answer": [1, 2], + }, + { # the 2nd is a subsequence of shifted 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 4, 1], + "answer": [1, 2, 4], + }, + { # the 1st is a subsequence of shifted 2nd + "sequence1": [2, 4, 1], + "sequence2": [1, 2, 3, 4], + "answer": [2, 4, 1], + }, + { # mixed case: few common elements + "sequence1": [1, 2, 4], + "sequence2": [2, 3, 4], + "answer": [2, 4], + }, + ] + + for i, case in enumerate(cases): + with self.subTest(f"case#{i}"): + self.assertListEqual( + case["answer"], + RouteMaster.find_common_circular_subsequence( + case["sequence1"], case["sequence2"] + ), + ) + + def _test_find_twin_routes_for_network(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] + + self.assertTrue(city.is_good) + + for route_master_id, expected_twin_ids in metro_sample[ + "twin_routes" + ].items(): + route_master = city.routes[route_master_id] + calculated_twins = route_master.find_twin_routes() + calculated_twin_ids = { + r1.id: r2.id for r1, r2 in calculated_twins.items() + } + self.assertDictEqual(expected_twin_ids, calculated_twin_ids) + + def test_find_twin_routes(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_find_twin_routes_for_network(sample) diff --git a/subways/tests/test_station.py b/subways/tests/test_station.py new file mode 100644 index 00000000..65f4b876 --- /dev/null +++ b/subways/tests/test_station.py @@ -0,0 +1,46 @@ +from unittest import TestCase + +from subways.structure.station import Station + + +class TestStation(TestCase): + def test__get_modes(self) -> None: + cases = [ + {"element": {"tags": {"railway": "station"}}, "modes": set()}, + { + "element": { + "tags": {"railway": "station", "station": "train"} + }, + "modes": {"train"}, + }, + { + "element": {"tags": {"railway": "station", "train": "yes"}}, + "modes": {"train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "station": "subway", + "train": "yes", + } + }, + "modes": {"subway", "train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "subway": "yes", + "train": "yes", + "light_rail": "yes", + "monorail": "yes", + } + }, + "modes": {"subway", "train", "light_rail", "monorail"}, + }, + ] + for case in cases: + element = case["element"] + expected_modes = case["modes"] + self.assertSetEqual(expected_modes, Station.get_modes(element)) diff --git a/subways/tests/test_storage.py b/subways/tests/test_storage.py new file mode 100644 index 00000000..692bddd1 --- /dev/null +++ b/subways/tests/test_storage.py @@ -0,0 +1,42 @@ +import json +from operator import itemgetter + +from subways.processors._common import transit_to_dict +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase + + +class TestStorage(JsonLikeComparisonMixin, TestCase): + def test_storage(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_storage_for_sample(sample) + + def _test_storage_for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + + calculated_transit_data = transit_to_dict(cities, transfers) + + control_transit_data = json.loads(metro_sample["json_dump"]) + control_transit_data["transfers"] = set( + map(tuple, control_transit_data["transfers"]) + ) + + self._compare_transit_data( + calculated_transit_data, control_transit_data + ) + + def _compare_transit_data( + self, transit_data1: dict, transit_data2: dict + ) -> None: + id_cmp = itemgetter("id") + + self.assertMappingAlmostEqual( + transit_data1, + transit_data2, + unordered_lists={ + "routes": id_cmp, + "itineraries": id_cmp, + "entrances": id_cmp, + }, + ) diff --git a/subways/tests/util.py b/subways/tests/util.py new file mode 100644 index 00000000..73c142c2 --- /dev/null +++ b/subways/tests/util.py @@ -0,0 +1,309 @@ +import io +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path +from typing import Any, TypeAlias, Self +from unittest import TestCase as unittestTestCase + +from subways.structure.city import City, find_transfers +from subways.subway_io import load_xml +from subways.validation import ( + add_osm_elements_to_cities, + validate_cities, + calculate_centers, +) + +TestCaseMixin: TypeAlias = Self | unittestTestCase + + +class TestCase(unittestTestCase): + """TestCase class for testing the Subway Validator""" + + CITY_TEMPLATE = { + "name": "Null Island", + "country": "World", + "continent": "Africa", + "bbox": "-179, -89, 179, 89", + "networks": "", + "num_stations": None, + "num_lines": 1, + "num_light_lines": 0, + "num_interchanges": 0, + } + + @classmethod + def setUpClass(cls) -> None: + cls.city_class = City + + def prepare_cities(self, metro_sample: dict) -> tuple: + """Load cities from file/string, validate them and return cities + and transfers. + """ + + def assign_unique_id(city_info: dict, cities_info: list[dict]) -> None: + """city_info - newly added city, cities_info - already added + cities. Check city id uniqueness / assign unique id to the city. + """ + occupied_ids = set(c["id"] for c in cities_info) + if "id" in city_info: + if city_info["id"] in occupied_ids: + raise RuntimeError("Not unique city ids in test data") + else: + city_info["id"] = max(occupied_ids, default=1) + 1 + + cities_given_info = metro_sample["cities_info"] + cities_info = list() + for city_given_info in cities_given_info: + city_info = self.CITY_TEMPLATE.copy() + for attr in city_given_info.keys(): + city_info[attr] = city_given_info[attr] + assign_unique_id(city_info, cities_info) + cities_info.append(city_info) + + if len(set(ci["name"] for ci in cities_info)) < len(cities_info): + raise RuntimeError("Not unique city names in test data") + + cities = list(map(self.city_class, cities_info)) + if "xml" in metro_sample: + xml_file = io.BytesIO(metro_sample["xml"].encode()) + else: + xml_file = ( + Path(__file__).resolve().parent / metro_sample["xml_file"] + ) + elements = load_xml(xml_file) + calculate_centers(elements) + add_osm_elements_to_cities(elements, cities) + validate_cities(cities) + transfers = find_transfers(elements, cities) + return cities, transfers + + +class JsonLikeComparisonMixin: + """Contains auxiliary methods for the TestCase class that allow + to compare json-like structures where some lists do not imply order + and actually represent sets. + Also, all collections compare floats with given precision to any nesting + depth. + """ + + def _assertAnyAlmostEqual( + self: TestCaseMixin, + first: Any, + second: Any, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Dispatcher method to other "...AlmostEqual" methods + depending on argument types. + + Compare dictionaries/lists recursively, numeric values being compared + approximately. + + :param: first a value of arbitrary type, including collections + :param: second a value of arbitrary type, including collections + :param: places number of fractional digits. Is passed to + the self.assertAlmostEqual() method. + :param: unordered_lists a dict whose keys are names of lists + to be compared without order, values - comparators for + the lists to sort them in an unambiguous order. If a comparator + is None, then the lists are compared as sets. + :param: ignore_keys a set of strs with keys that should be ignored + during recursive comparison of dictionaries. May be used to + elaborate a custom comparison mechanism for some substructures. + :return: None + """ + if all(isinstance(x, Mapping) for x in (first, second)): + self.assertMappingAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif all( + isinstance(x, Sequence) and not isinstance(x, (str, bytes)) + for x in (first, second) + ): + self.assertSequenceAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif isinstance(first, float) and isinstance(second, float): + self.assertAlmostEqual(first, second, places) + else: + self.assertEqual(first, second) + + def assertSequenceAlmostEqual( + self: TestCaseMixin, + seq1: Sequence, + seq2: Sequence, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Compare two sequences, items of numeric types being compared + approximately, containers being approx-compared recursively. + + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method + :return: None + """ + if not (isinstance(seq1, Sequence) and isinstance(seq2, Sequence)): + raise RuntimeError( + f"Not a sequence passed to the '{self.__class__.__name__}." + "assertSequenceAlmostEqual' method" + ) + self.assertEqual(len(seq1), len(seq2)) + for a, b in zip(seq1, seq2): + self._assertAnyAlmostEqual( + a, + b, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + + def assertSequenceAlmostEqualIgnoreOrder( + self: TestCaseMixin, + seq1: Sequence, + seq2: Sequence, + cmp: Callable | None = None, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Compares two sequences as sets, i.e. ignoring order. Nested + lists determined with unordered_lists parameter are also compared + without order. + + :param: cmp if None then compare sequences as sets. If elements are + not hashable then this method is inapplicable and the + sorted (with the comparator) sequences are compared. + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method + :return: None + """ + if cmp is not None: + v1 = sorted(seq1, key=cmp) + v2 = sorted(seq2, key=cmp) + self.assertSequenceAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + else: + self.assertEqual(len(seq1), len(seq2)) + v1 = set(seq1) + v2 = set(seq2) + self.assertSetEqual(v1, v2) + + def assertMappingAlmostEqual( + self: TestCaseMixin, + d1: Mapping, + d2: Mapping, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Compare dictionaries recursively, numeric values being compared + approximately, some lists being compared without order. + + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + Example 1: + d1 = { + "name_from_unordered_list": [a1, b1, c1], + "some_other_name": [e1, f1, g1], + } + d2 = { + "name_from_unordered_list": [a2, b2, c2], + "some_other_name": [e2, f2, g2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, lists [e1, f1, g1] and [e2, f2, g2] - + considering the order. + + Example 2: + d1 = { + "name_from_unordered_list": { + "key1": [a1, b1, c1], + "key2": [e1, f1, g1], + }, + "some_other_name": [h1, i1, k1], + } + d2 = { + "name_from_unordered_list": { + "key1": [a2, b2, c2], + "key2": [e2, f2, g2], + }, + "some_other_name": [h2, i2, k2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, as well as [e1, f1, g1] and + [e2, f2, g2]; lists [h1, i1, k1] and [h2, i2, k2] - + considering the order. + :param: ignore_keys see _assertAnyAlmostEqual() method + :return: None + """ + if not (isinstance(d1, Mapping) and isinstance(d2, Mapping)): + raise RuntimeError( + f"Not a dictionary passed to the '{self.__class__.__name__}." + "assertMappingAlmostEqual' method" + ) + + d1_keys = set(d1.keys()) + d2_keys = set(d2.keys()) + if ignore_keys: + d1_keys -= ignore_keys + d2_keys -= ignore_keys + self.assertSetEqual(d1_keys, d2_keys) + + if unordered_lists is None: + unordered_lists = {} + + for k in d1_keys: + v1 = d1[k] + v2 = d2[k] + if (cmp := unordered_lists.get(k, "")) == "" or not isinstance( + v1, (Sequence, Mapping) + ): + self._assertAnyAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif isinstance(v1, Sequence): + self.assertSequenceAlmostEqualIgnoreOrder( + v1, + v2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + else: + self.assertSetEqual(set(v1.keys()), set(v2.keys())) + for ik in v1.keys(): + iv1 = v1[ik] + iv2 = v2[ik] + self.assertSequenceAlmostEqualIgnoreOrder( + iv1, + iv2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) diff --git a/subways/types.py b/subways/types.py new file mode 100644 index 00000000..cb1189ae --- /dev/null +++ b/subways/types.py @@ -0,0 +1,14 @@ +from typing import TypeAlias + + +OsmElementT: TypeAlias = dict +IdT: TypeAlias = str # Type of feature ids +TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs +TransfersT: TypeAlias = list[TransferT] +LonLat: TypeAlias = tuple[float, float] +RailT: TypeAlias = list[LonLat] + + +class CriticalValidationError(Exception): + """Is thrown if an error occurs + that prevents further validation of a city.""" diff --git a/subways/validation.py b/subways/validation.py new file mode 100644 index 00000000..67df60e9 --- /dev/null +++ b/subways/validation.py @@ -0,0 +1,253 @@ +import csv +import logging +import urllib.request +from functools import partial + +from subways.structure.city import City +from subways.types import CriticalValidationError, LonLat, OsmElementT + +DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" +DEFAULT_CITIES_INFO_URL = ( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" +) +BAD_MARK = "[bad]" + + +def get_way_center( + element: OsmElementT, node_centers: dict[int, LonLat] +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then ways already have 'center' attribute + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + if "nodes" not in element: + return None + + center = [0, 0] + count = 0 + way_nodes = element["nodes"] + way_nodes_len = len(element["nodes"]) + for i, nd in enumerate(way_nodes): + if nd not in node_centers: + continue + # Don't count the first node of a closed way twice + if ( + i == way_nodes_len - 1 + and way_nodes_len > 1 + and way_nodes[0] == way_nodes[-1] + ): + break + center[0] += node_centers[nd][0] + center[1] += node_centers[nd][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def get_relation_center( + element: OsmElementT, + node_centers: dict[int, LonLat], + way_centers: dict[int, LonLat], + relation_centers: dict[int, LonLat], + ignore_unlocalized_child_relations: bool = False, +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :param way_centers: osm_id => LonLat + :param relation_centers: osm_id => LonLat + :param ignore_unlocalized_child_relations: if a member that is a relation + has no center, skip it and calculate center based on member nodes, + ways and other, "localized" (with known centers), relations + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then some relations already have 'center' + # attribute. But this is not the case for relations composed only + # of other relations (e.g., route_master, stop_area_group or + # stop_area with only members that are multipolygons) + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + center = [0, 0] + count = 0 + for m in element.get("members", list()): + m_id = m["ref"] + m_type = m["type"] + if m_type == "relation" and m_id not in relation_centers: + if ignore_unlocalized_child_relations: + continue + else: + # Cannot calculate fair center because the center + # of a child relation is not known yet + return None + member_container = ( + node_centers + if m_type == "node" + else way_centers + if m_type == "way" + else relation_centers + ) + if m_id in member_container: + center[0] += member_container[m_id][0] + center[1] += member_container[m_id][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def calculate_centers(elements: list[OsmElementT]) -> None: + """Adds 'center' key to each way/relation in elements, + except for empty ways or relations. + Relies on nodes-ways-relations order in the elements list. + """ + nodes: dict[int, LonLat] = {} # id => LonLat + ways: dict[int, LonLat] = {} # id => approx center LonLat + relations: dict[int, LonLat] = {} # id => approx center LonLat + + unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means + # the center of the relation has not been calculated yet + + for el in elements: + if el["type"] == "node": + nodes[el["id"]] = (el["lon"], el["lat"]) + elif el["type"] == "way": + if center := get_way_center(el, nodes): + ways[el["id"]] = center + elif el["type"] == "relation": + if center := get_relation_center(el, nodes, ways, relations): + relations[el["id"]] = center + else: + unlocalized_relations.append(el) + + def iterate_relation_centers_calculation( + ignore_unlocalized_child_relations: bool, + ) -> list[OsmElementT]: + unlocalized_relations_upd = [] + for rel in unlocalized_relations: + if center := get_relation_center( + rel, nodes, ways, relations, ignore_unlocalized_child_relations + ): + relations[rel["id"]] = center + else: + unlocalized_relations_upd.append(rel) + return unlocalized_relations_upd + + # Calculate centers for relations that have no one yet + while unlocalized_relations: + unlocalized_relations_upd = iterate_relation_centers_calculation(False) + progress = len(unlocalized_relations_upd) < len(unlocalized_relations) + if not progress: + unlocalized_relations_upd = iterate_relation_centers_calculation( + True + ) + progress = len(unlocalized_relations_upd) < len( + unlocalized_relations + ) + if not progress: + break + unlocalized_relations = unlocalized_relations_upd + + +def add_osm_elements_to_cities( + osm_elements: list[OsmElementT], cities: list[City] +) -> None: + for el in osm_elements: + for c in cities: + if c.contains(el): + c.add(el) + + +def validate_cities(cities: list[City]) -> list[City]: + """Validate cities. Return list of good cities.""" + good_cities = [] + for c in cities: + try: + c.extract_routes() + except CriticalValidationError as e: + logging.error( + "Critical validation error while processing %s: %s", + c.name, + e, + ) + c.error(str(e)) + except AssertionError as e: + logging.error( + "Validation logic error while processing %s: %s", + c.name, + e, + ) + c.error(f"Validation logic error: {e}") + else: + c.validate() + if c.is_good: + c.calculate_distances() + good_cities.append(c) + + return good_cities + + +def get_cities_info( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, +) -> list[dict]: + response = urllib.request.urlopen(cities_info_url) + if ( + not cities_info_url.startswith("file://") + and (r_code := response.getcode()) != 200 + ): + raise Exception( + f"Failed to download cities spreadsheet: HTTP {r_code}" + ) + data = response.read().decode("utf-8") + reader = csv.DictReader( + data.splitlines(), + fieldnames=( + "id", + "name", + "country", + "continent", + "num_stations", + "num_lines", + "num_light_lines", + "num_interchanges", + "bbox", + "networks", + ), + ) + + cities_info = list() + names = set() + next(reader) # skipping the header + for city_info in reader: + if city_info["id"] and city_info["bbox"]: + cities_info.append(city_info) + name = city_info["name"].strip() + if name in names: + logging.warning( + "Duplicate city name in city list: %s", + city_info, + ) + names.add(name) + return cities_info + + +def prepare_cities( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False +) -> list[City]: + if overground: + raise NotImplementedError("Overground transit not implemented yet") + cities_info = get_cities_info(cities_info_url) + return list(map(partial(City, overground=overground), cities_info)) diff --git a/tests/assets/kuntsevskaya_centers.json b/tests/assets/kuntsevskaya_centers.json deleted file mode 100644 index 36317ec5..00000000 --- a/tests/assets/kuntsevskaya_centers.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "w38836456": { - "lat": 55.73064775, - "lon": 37.446065950000005 - }, - "w489951237": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r7588527": { - "lat": 55.73066371666667, - "lon": 37.44604881666667 - }, - "r7588528": { - "lat": 55.73075192499999, - "lon": 37.44609837 - }, - "r7588561": { - "lat": 55.73070782083333, - "lon": 37.44607359333334 - }, - "r13426423": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r100": null, - "r101": null -} diff --git a/tests/test_error_messages.py b/tests/test_error_messages.py deleted file mode 100644 index 12a5583d..00000000 --- a/tests/test_error_messages.py +++ /dev/null @@ -1,22 +0,0 @@ -from tests.sample_data_for_error_messages import sample_networks -from tests.util import TestCase - - -class TestValidationMessages(TestCase): - """Test that the validator provides expected validation messages - on different types of errors in input OSM data. - """ - - def _test_validation_messages_for_network(self, network_data): - city = self.validate_city(network_data) - - for err_level in ("errors", "warnings", "notices"): - self.assertListEqual( - sorted(getattr(city, err_level)), - sorted(network_data[err_level]), - ) - - def test_validation_messages(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_validation_messages_for_network(network_data) diff --git a/tests/util.py b/tests/util.py deleted file mode 100644 index efab8c22..00000000 --- a/tests/util.py +++ /dev/null @@ -1,49 +0,0 @@ -import io -from unittest import TestCase as unittestTestCase - -from subway_io import load_xml -from subway_structure import City - - -class TestCase(unittestTestCase): - """TestCase class for testing the Subway Validator""" - - CITY_TEMPLATE = { - "id": 1, - "name": "Null Island", - "country": "World", - "continent": "Africa", - "bbox": "-179, -89, 179, 89", - "networks": "", - "num_stations": None, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, - } - - def validate_city(self, network) -> City: - city_data = self.CITY_TEMPLATE.copy() - for attr in self.CITY_TEMPLATE.keys(): - if attr in network: - city_data[attr] = network[attr] - - city = City(city_data) - elements = load_xml(io.BytesIO(network["xml"].encode("utf-8"))) - for el in elements: - city.add(el) - city.extract_routes() - city.validate() - return city - - def assertListAlmostEqual(self, list1, list2, places=10) -> None: - if not (isinstance(list1, list) and isinstance(list2, list)): - raise RuntimeError( - f"Not lists passed to the '{self.__class__.__name__}." - "assertListAlmostEqual' method" - ) - self.assertEqual(len(list1), len(list2)) - for a, b in zip(list1, list2): - if isinstance(a, list) and isinstance(b, list): - self.assertListAlmostEqual(a, b, places) - else: - self.assertAlmostEqual(a, b, places) diff --git a/checkers/common.py b/tools/checkers/common.py similarity index 100% rename from checkers/common.py rename to tools/checkers/common.py diff --git a/checkers/compare_city_caches.py b/tools/checkers/compare_city_caches.py similarity index 100% rename from checkers/compare_city_caches.py rename to tools/checkers/compare_city_caches.py diff --git a/checkers/compare_json_outputs.py b/tools/checkers/compare_json_outputs.py similarity index 100% rename from checkers/compare_json_outputs.py rename to tools/checkers/compare_json_outputs.py diff --git a/mapsme_json_to_cities.py b/tools/legacy/mapsme_json_to_cities.py similarity index 79% rename from mapsme_json_to_cities.py rename to tools/legacy/mapsme_json_to_cities.py index 1c69a77e..65cbf5f3 100644 --- a/mapsme_json_to_cities.py +++ b/tools/legacy/mapsme_json_to_cities.py @@ -1,7 +1,19 @@ +""" +Generate sorted list of all cities, with [bad] mark for bad cities. + +!!! Deprecated for use in validation cycle. +Use "scripts/process_subways.py --dump-city-list " instead. +""" + + import argparse import json -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import ( + BAD_MARK, + DEFAULT_CITIES_INFO_URL, + get_cities_info, +) if __name__ == "__main__": @@ -11,7 +23,7 @@ used by subway render to generate the list of network at frontend. It uses two sources: a mapsme.json validator output with good networks, and a google spreadsheet with networks for the - process_subways.download_cities() function.""" + subways.validation.get_cities_info() function.""" ), formatter_class=argparse.RawTextHelpFormatter, ) @@ -56,7 +68,7 @@ if ci["name"] in good_cities: lines.append(f"{ci['name']}, {ci['country']}") elif with_bad: - lines.append(f"{ci['name']}, {ci['country']} (Bad)") + lines.append(f"{ci['name']}, {ci['country']} {BAD_MARK}") for line in sorted(lines): print(line) diff --git a/make_all_metro_poly.py b/tools/make_poly/make_all_metro_poly.py similarity index 95% rename from make_all_metro_poly.py rename to tools/make_poly/make_all_metro_poly.py index e8450a24..88f9b8aa 100644 --- a/make_all_metro_poly.py +++ b/tools/make_poly/make_all_metro_poly.py @@ -3,7 +3,7 @@ from shapely import unary_union from shapely.geometry import MultiPolygon, Polygon -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import DEFAULT_CITIES_INFO_URL, get_cities_info def make_disjoint_metro_polygons(cities_info_url: str) -> None: diff --git a/tools/make_poly/requirements.txt b/tools/make_poly/requirements.txt new file mode 100644 index 00000000..03bc2d90 --- /dev/null +++ b/tools/make_poly/requirements.txt @@ -0,0 +1,4 @@ +shapely==2.0.4 + +# Fixate versions of indirect requirements +NumPy==2.0.0 diff --git a/tools/make_poly/tests/__init__.py b/tools/make_poly/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/assets/cities_info_1city.csv b/tools/make_poly/tests/assets/cities_info_1city.csv similarity index 100% rename from tests/assets/cities_info_1city.csv rename to tools/make_poly/tests/assets/cities_info_1city.csv diff --git a/tests/assets/cities_info_2cities.csv b/tools/make_poly/tests/assets/cities_info_2cities.csv similarity index 100% rename from tests/assets/cities_info_2cities.csv rename to tools/make_poly/tests/assets/cities_info_2cities.csv diff --git a/tests/test_make_all_metro_poly.py b/tools/make_poly/tests/test_make_all_metro_poly.py similarity index 94% rename from tests/test_make_all_metro_poly.py rename to tools/make_poly/tests/test_make_all_metro_poly.py index dac8dae5..d6df8311 100644 --- a/tests/test_make_all_metro_poly.py +++ b/tools/make_poly/tests/test_make_all_metro_poly.py @@ -1,6 +1,6 @@ import contextlib import io -import os +from pathlib import Path from unittest import TestCase from make_all_metro_poly import make_disjoint_metro_polygons @@ -63,9 +63,8 @@ class TestMakeAllMetroPoly(TestCase): def test_make_disjoint_metro_polygons(self) -> None: for case in cases: with self.subTest(msg=case["csv_file"]): - file_url = ( - f"file://{os.getcwd()}/tests/assets/{case['csv_file']}" - ) + assets_dir = Path(__file__).resolve().parent / "assets" + file_url = f"file://{assets_dir}/{case['csv_file']}" stream = io.StringIO() with contextlib.redirect_stdout(stream): make_disjoint_metro_polygons(file_url) diff --git a/stop_areas/make_stop_areas.py b/tools/stop_areas/make_stop_areas.py similarity index 100% rename from stop_areas/make_stop_areas.py rename to tools/stop_areas/make_stop_areas.py diff --git a/stop_areas/make_tram_areas.py b/tools/stop_areas/make_tram_areas.py similarity index 100% rename from stop_areas/make_tram_areas.py rename to tools/stop_areas/make_tram_areas.py diff --git a/stop_areas/requirements.txt b/tools/stop_areas/requirements.txt similarity index 100% rename from stop_areas/requirements.txt rename to tools/stop_areas/requirements.txt diff --git a/stop_areas/serve.py b/tools/stop_areas/serve.py similarity index 100% rename from stop_areas/serve.py rename to tools/stop_areas/serve.py diff --git a/stop_areas/templates/index.html b/tools/stop_areas/templates/index.html similarity index 100% rename from stop_areas/templates/index.html rename to tools/stop_areas/templates/index.html diff --git a/v2h_templates.py b/tools/v2h/v2h_templates.py similarity index 100% rename from v2h_templates.py rename to tools/v2h/v2h_templates.py diff --git a/validation_to_html.py b/tools/v2h/validation_to_html.py similarity index 97% rename from validation_to_html.py rename to tools/v2h/validation_to_html.py index f772a4f5..42158f89 100755 --- a/validation_to_html.py +++ b/tools/v2h/validation_to_html.py @@ -7,9 +7,9 @@ import os import re from collections import defaultdict -from typing import Any, Optional +from typing import Any -from process_subways import DEFAULT_SPREADSHEET_ID +from subways.validation import DEFAULT_SPREADSHEET_ID from v2h_templates import ( COUNTRY_CITY, COUNTRY_FOOTER, @@ -22,8 +22,7 @@ class CityData: - def __init__(self, city: Optional[str] = None) -> None: - self.city = city is not None + def __init__(self, city: dict | None = None) -> None: self.data = { "good_cities": 0, "total_cities": 1 if city else 0, @@ -93,7 +92,7 @@ def format(self, s: str) -> str: return s -def tmpl(s: str, data: Optional[CityData] = None, **kwargs) -> str: +def tmpl(s: str, data: CityData | None = None, **kwargs) -> str: if data: s = data.format(s) if kwargs: