Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Tulare Lake Basin to makefile #20

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added data/raw/TLB_HUC06_TulareLakeBasin.zip
Binary file not shown.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
awscli
click
coverage
descartes
flake8
geopandas
jupyterlab
Expand Down
2 changes: 1 addition & 1 deletion src/active_violations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from src.config import DATA_DIRECTORY
from src import utils

logger = logging.getLogger()
logger = logging.getLogger(__name__)


def get_last_ended_action(group):
Expand Down
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
HR2W_EXCEEDANCE_URL = "https://www.waterboards.ca.gov/water_issues/programs/hr2w/docs/data/hr2w_web_data_active_2-2019.xlsx"
HR2W_RETURN_TO_COMPLIANCE_URL = "https://www.waterboards.ca.gov/water_issues/programs/hr2w/docs/data/hr2w_web_data_rtc_2-2019.xlsx"
WATER_SYSTEM_LOCATIONS_URL = "https://www.waterboards.ca.gov/water_issues/programs/hr2w/docs/data/ec_summary_feb2019.zip"
TULARE_LAKE_BASIN_SHP_PATH = DATA_DIRECTORY / "raw" / "TLB_HUC06_TulareLakeBasin.zip"
55 changes: 14 additions & 41 deletions src/data/make_dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
# -*- coding: utf-8 -*-
import shapefile
from json import dumps
import urllib
from pathlib import Path
import io
import zipfile
import click
import io
import logging
import pandas as pd
import requests
Expand All @@ -15,8 +10,10 @@
DATA_DIRECTORY,
HR2W_EXCEEDANCE_URL,
HR2W_RETURN_TO_COMPLIANCE_URL,
TULARE_LAKE_BASIN_SHP_PATH,
WATER_SYSTEM_LOCATIONS_URL,
)
from src import utils


def make_hr2w_data():
Expand All @@ -42,44 +39,17 @@ def make_hr2w_data():
df.to_csv(DATA_DIRECTORY / "interim" / "hr2w_return_to_compliance.csv", index=False)


def make_water_system_latlon():
def make_water_system_locations():
# from https://stackoverflow.com/questions/43119040/shapefile-into-geojson-conversion-python-3
data = requests.get(WATER_SYSTEM_LOCATIONS_URL)
zpfile = zipfile.ZipFile(io.BytesIO(data.content))

output_directory = Path(
DATA_DIRECTORY
/ "raw"
/ Path(urllib.parse.urlparse(WATER_SYSTEM_LOCATIONS_URL).path).stem
)
output_directory.mkdir(parents=True, exist_ok=True)

zpfile.extractall(output_directory)
output_path = DATA_DIRECTORY / "interim" / "water_system_locations.geojson"
utils.convert_shp_to_geojson(io.BytesIO(data.content), output_path)

shp_filename = [
p.filename for p in zpfile.infolist() if p.filename.endswith(".shp")
][0]

reader = shapefile.Reader(str(output_directory / shp_filename))
fields = reader.fields[1:]

field_names = [field[0] for field in fields]
features = []
for record in reader.shapeRecords():
atr = dict(zip(field_names, record.record))
features.append(
{
"type": "Feature",
"geometry": record.shape.__geo_interface__,
"properties": atr,
}
)

# write the GeoJSON file
with open(DATA_DIRECTORY / "interim" / "water_system_latlon.geojson", "w") as f:
f.write(
dumps({"type": "FeatureCollection", "features": features}, indent=2) + "\n"
)
def make_tulare_lake_basin_location():
output_path = DATA_DIRECTORY / "interim" / "tulare_lake_basin.geojson"
utils.convert_shp_to_geojson(TULARE_LAKE_BASIN_SHP_PATH, output_path)


@click.command()
Expand All @@ -92,8 +62,11 @@ def main():
logger.info("making hr2w dataset")
make_hr2w_data()

logger.info("making water system latitude/longitude dataset")
make_water_system_latlon()
logger.info("making water system location dataset")
make_water_system_locations()

logger.info("making Tulare Lake Basin location")
make_tulare_lake_basin_location()


if __name__ == "__main__":
Expand Down
49 changes: 49 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,57 @@
import io
import json
from pathlib import Path
import math
import shapefile
import shapefile
import tempfile
import zipfile

millnames = ["", " thousand", " million", " billion", " trillion"]


def convert_shp_to_geojson(input_path, output_path):
"""Convert a shapefile to geojson

Parameters
----------
input_path : str or file stream
output_path : str
"""
zfile = zipfile.ZipFile(input_path)

temp_directory = tempfile.TemporaryDirectory()
zfile.extractall(temp_directory.name)

shp_filename = [
p.filename for p in zfile.infolist() if p.filename.endswith(".shp")
][0]

reader = shapefile.Reader(str(Path(temp_directory.name) / shp_filename))
fields = reader.fields[1:]

field_names = [field[0] for field in fields]
features = []
for record in reader.shapeRecords():
atr = dict(zip(field_names, record.record))
features.append(
{
"type": "Feature",
"geometry": record.shape.__geo_interface__,
"properties": atr,
}
)

temp_directory.cleanup()

# write the GeoJSON file
with open(output_path, "w") as f:
f.write(
json.dumps({"type": "FeatureCollection", "features": features}, indent=2)
+ "\n"
)


def millify(n):
"""Human-readable large numbers

Expand Down