Skip to content

Commit

Permalink
docs: add docstrings (#49)
Browse files Browse the repository at this point in the history
* docs: add docstrings

Closes #22

* address PR comments

- Added TODO comment on "D" exclusion in `tests` folder
- Elaborate on the docstring in `wind_up/interface.py`
- Rename `math_funcs.py` to `circular_math.py` + update module docstring
  • Loading branch information
samuelwnaylor authored Jan 27, 2025
1 parent 15c7f2f commit 2fe80db
Show file tree
Hide file tree
Showing 41 changed files with 746 additions and 274 deletions.
1 change: 1 addition & 0 deletions examples/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Examples of using wind-up."""
3 changes: 3 additions & 0 deletions examples/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Helper functions for the examples."""

from __future__ import annotations

import logging
Expand All @@ -19,6 +21,7 @@


def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None:
"""Initializes the logger with a file handler and a console handler."""
log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s")
root_logger = logging.getLogger()
root_logger.setLevel(level)
Expand Down
12 changes: 8 additions & 4 deletions examples/kelmarsh_kaggle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Example submission for https://www.kaggle.com/competitions/predict-the-wind-speed-at-a-wind-turbine/.
wind-up is used for feature engineering, in particular estimating wind speed based on power
and directional detrending."""
and directional detrending.
"""

from __future__ import annotations

Expand Down Expand Up @@ -247,7 +248,8 @@ def make_windup_features(analysis_name: str) -> None:
def save_t1_detrend_dfs(assessment_inputs: AssessmentInputs) -> None:
"""Save the detrended dataframes for Kelmarsh 1 and the reference turbines.
note most of this logic is copied from wind_up/main_analysis.py"""
note most of this logic is copied from wind_up/main_analysis.py
"""
wf_df = assessment_inputs.wf_df
cfg = assessment_inputs.cfg
plot_cfg = assessment_inputs.plot_cfg
Expand Down Expand Up @@ -465,7 +467,8 @@ def sun_alt(
) -> float:
"""Calculate sun altitude for a given row in a DataFrame.
This code was adapted from https://github.com/NREL/flasc"""
This code was adapted from https://github.com/NREL/flasc
"""
observer.lat = str(latitude)
observer.long = str(longitude)
observer.date = row[utc_timestamp_col] + time_shift
Expand All @@ -484,7 +487,8 @@ def add_sun_alt_to_df(
) -> pd.DataFrame:
"""Calculate sun altitude for a given row in a DataFrame.
This code was adapted from https://github.com/NREL/flasc"""
This code was adapted from https://github.com/NREL/flasc
"""
out_df = input_df.copy()
observer = ephem.Observer()
return out_df.assign(
Expand Down
4 changes: 1 addition & 3 deletions examples/smarteole_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@
def unpack_smarteole_scada(
timebase_s: int, scada_data_file: Path | str | IO[bytes] = DEFAULT_SCADA_FILE_PATH
) -> pd.DataFrame:
"""
Function that translates 1-minute SCADA data to x minute data in the wind-up expected format
"""
"""Function that translates 1-minute SCADA data to x minute data in the wind-up expected format"""

def _separate_turbine_id_from_field(x: str) -> tuple[str, str]:
parts = x.split("_")
Expand Down
6 changes: 2 additions & 4 deletions examples/wedowind_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@ def create_fake_wedowind_reanalysis_dataset(start_datetime: dt.datetime) -> Rean


def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates:
"""
Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig.
"""Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig.
Args:
scada_df:
Expand Down Expand Up @@ -258,8 +257,7 @@ def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates:
def generate_custom_exploratory_plots(
scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int, out_dir: Path
) -> Path:
"""
These custom plots are to help with SCADA data exploration.
"""These custom plots are to help with SCADA data exploration.
It was created because it was unclear how the SCADA data is related to the metadata so helped in looking for wakes
in the data.
Expand Down
14 changes: 11 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ select = ["ALL"] # https://beta.ruff.rs/docs/rules/
ignore = [
"ANN204", # `__init__` doesn't need annotations
"S301", # `pickle` and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue
"D", # docstring checks
"PGH004", # Use specific rule codes when using `noqa`. Seems to give false alarms
"COM812", # can conflict with ruff formatter
"ISC001", # can conflict with ruff formatter
"G004", # logging statement can use f-string
"D203", # `incorrect-blank-line-before-class` (D203) and `no-blank-line-before-class` (D211) are incompatible
"D213", # `multi-line-summary-first-line` (D212) and `multi-line-summary-second-line` (D213) are incompatible
]

[tool.ruff.lint.mccabe]
Expand All @@ -99,11 +100,18 @@ max-args = 17 # try to bring this down to 5

[tool.ruff.lint.per-file-ignores]
"wind_up/models.py" = ["N805", "PERF401"] # try to eliminate this
"tests/**/*.py" = ["S101", "PLR2004"] # allow `assert` and magic values in tests
"tests/**/*.py" = [
"S101", # allow `assert`
"PLR2004", # allow magic values
"D", # TODO: remove this
]
"tests/test_smart_data.py" = ["DTZ001"] # SMART functions use tz naive datetimes
"**/__init__.py" = ["F401"] # ignore unused imports in __init__.py
"examples/**/*.py" = ["T20"] # allow print in examples
"examples/**/*.py" = ["T20", "D"]
"examples/**/kelmarsh_kaggle.py" = ["S101","PLR0915","PD013","N806"]
"wind_up/smart_data.py" = ["D"] # RES specific data loading
"wind_up/plots/*.py" = ["D"] # TODO: remove this
"examples/*.ipynb" = ["D"] # Jupyter notebooks

[tool.mypy]
plugins = ["pydantic.mypy"]
Expand Down
6 changes: 3 additions & 3 deletions tests/test_main_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pandas.testing import assert_frame_equal

from wind_up.constants import TIMESTAMP_COL
from wind_up.main_analysis import toggle_pairing_filter
from wind_up.main_analysis import _toggle_pairing_filter


def test_toggle_pairing_filter_method_none() -> None:
Expand Down Expand Up @@ -35,7 +35,7 @@ def test_toggle_pairing_filter_method_none() -> None:
index=post_tstamps,
)

filt_pre_df, filt_post_df = toggle_pairing_filter(
filt_pre_df, filt_post_df = _toggle_pairing_filter(
pre_df=pre_df,
post_df=post_df,
pairing_filter_method="none",
Expand Down Expand Up @@ -107,7 +107,7 @@ def copy_of_make_extended_time_index(
exp_filt_post_df = b[
[x in copy_of_make_extended_time_index(a.index, pd.Timedelta("10min"), tolerance_minutes * 60) for x in b.index]
]
filt_pre_df, filt_post_df = toggle_pairing_filter(
filt_pre_df, filt_post_df = _toggle_pairing_filter(
pre_df=pre_df,
post_df=post_df,
pairing_filter_method="any_within_timedelta",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_math_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from pandas.testing import assert_series_equal

from wind_up.math_funcs import circ_diff
from wind_up.circular_math import circ_diff

test_circ_diff_data = [
(0, 0, 0),
Expand Down
10 changes: 5 additions & 5 deletions tests/test_northing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from wind_up.constants import REANALYSIS_WD_COL
from wind_up.models import WindUpConfig
from wind_up.northing import apply_northing_corrections, calc_max_abs_north_errs
from wind_up.scada_funcs import scada_multi_index
from wind_up.northing import _calc_max_abs_north_errs, apply_northing_corrections
from wind_up.scada_funcs import _scada_multi_index


def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
Expand All @@ -19,7 +19,7 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
wtg_df = test_df.copy()
wtg_df["TurbineName"] = wtg_name
wf_df = pd.concat([wf_df, wtg_df])
wf_df = scada_multi_index(wf_df)
wf_df = _scada_multi_index(wf_df)
wf_df_after_northing = apply_northing_corrections(wf_df, cfg=cfg, north_ref_wd_col=REANALYSIS_WD_COL, plot_cfg=None)

median_yaw_before_northing = wf_df.groupby("TurbineName")["YawAngleMean"].median()
Expand All @@ -32,10 +32,10 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
assert median_yaw_after_northing["LSA_T13"] == pytest.approx(235.22855377197266)
assert median_yaw_after_northing["LSA_T14"] == pytest.approx(224.92881774902344)

abs_north_errs_before_northing = calc_max_abs_north_errs(
abs_north_errs_before_northing = _calc_max_abs_north_errs(
wf_df, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
)
abs_north_errs_after_northing = calc_max_abs_north_errs(
abs_north_errs_after_northing = _calc_max_abs_north_errs(
wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
)
assert abs_north_errs_before_northing.min() == pytest.approx(7.88920288085938)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_optimize_northing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tests.conftest import TEST_DATA_FLD
from wind_up.constants import RAW_DOWNTIME_S_COL, RAW_POWER_COL, RAW_YAWDIR_COL, TIMESTAMP_COL
from wind_up.models import WindUpConfig
from wind_up.optimize_northing import auto_northing_corrections, clip_wtg_north_table
from wind_up.optimize_northing import _clip_wtg_north_table, auto_northing_corrections
from wind_up.reanalysis_data import ReanalysisDataset, add_reanalysis_data


Expand Down Expand Up @@ -40,7 +40,7 @@ def test_clip_wtg_north_table_entries_before() -> None:
"north_offset": [1, 2, 3],
},
)
actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)


Expand Down Expand Up @@ -72,7 +72,7 @@ def test_clip_wtg_north_table_entry_exactly_at_start() -> None:
"north_offset": [1, 2, 3],
},
)
actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)


Expand Down Expand Up @@ -101,7 +101,7 @@ def test_clip_wtg_north_table_entry_after_start() -> None:
"north_offset": [0],
},
)
actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)


Expand Down
4 changes: 2 additions & 2 deletions tests/test_pp_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pandas.testing import assert_frame_equal

from wind_up.models import WindUpConfig
from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal
from wind_up.pp_analysis import _pre_post_pp_analysis_with_reversal


def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -> None:
Expand All @@ -22,7 +22,7 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
lt_wtg_df_filt = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_filt.parquet")
test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
expected_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/pre_post_pp_analysis_expected_df.parquet")
pp_results, actual_df = pre_post_pp_analysis_with_reversal(
pp_results, actual_df = _pre_post_pp_analysis_with_reversal(
cfg=cfg,
test_wtg=test_wtg,
ref_name=ref_name,
Expand Down
50 changes: 25 additions & 25 deletions tests/test_scada_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
from wind_up.constants import TIMESTAMP_COL
from wind_up.models import WindUpConfig
from wind_up.scada_funcs import (
add_pw_clipped,
filter_bad_pw_ws,
filter_downtime,
filter_exclusions,
filter_missing_rpm_or_pt,
filter_rpm_and_pt,
filter_rpm_and_pt_oor_one_ttype,
filter_stuck_data,
filter_wrong_yaw,
filter_yaw_exclusions,
scada_multi_index,
wrap_yaw_and_pitch,
_add_pw_clipped,
_filter_bad_pw_ws,
_filter_downtime,
_filter_exclusions,
_filter_missing_rpm_or_pt,
_filter_rpm_and_pt,
_filter_rpm_and_pt_oor_one_ttype,
_filter_stuck_data,
_filter_wrong_yaw,
_filter_yaw_exclusions,
_scada_multi_index,
_wrap_yaw_and_pitch,
)


Expand All @@ -45,7 +45,7 @@ def test_filter_stuck_data() -> None:
expected.iloc[-2, :] = np.nan

# Run the function
actual = filter_stuck_data(input_df)
actual = _filter_stuck_data(input_df)

# Check the output is as expected
assert_frame_equal(actual, expected)
Expand All @@ -64,7 +64,7 @@ def test_filter_bad_pw_ws() -> None:
},
index=idx,
)
adf = filter_bad_pw_ws(adf, max_rated_power=2000)
adf = _filter_bad_pw_ws(adf, max_rated_power=2000)
edf = pd.DataFrame(
data={
"ActivePowerMean": [np.nan, np.nan, np.nan, np.nan, 4000, np.nan, -1000, np.nan, np.nan],
Expand All @@ -89,7 +89,7 @@ def test_wrap_yaw_and_pitch() -> None:
},
index=idx,
)
adf = wrap_yaw_and_pitch(adf)
adf = _wrap_yaw_and_pitch(adf)
edf = pd.DataFrame(
data={
"YawAngleMean": [179, 180, 181, 359, 0, 1, 359, 0, 1],
Expand All @@ -113,7 +113,7 @@ def test_filter_wrong_yaw() -> None:
},
index=idx,
)
adf = filter_wrong_yaw(adf)
adf = _filter_wrong_yaw(adf)
edf = pd.DataFrame(
data={
"YawAngleMean": [180, 180, 180, np.nan, np.nan, np.nan, np.nan, 180, 180],
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_filter_exclusions() -> None:
("MRG_T02", pd.Timestamp("2021-01-01 00:29:00", tz="UTC"), pd.Timestamp("2021-01-01 00:31:00", tz="UTC")),
("ALL", pd.Timestamp("2021-01-01 00:30:00", tz="UTC"), pd.Timestamp("2022-02-02 00:00:00", tz="UTC")),
]
adf = filter_exclusions(adf.copy(), exclusion_periods_utc)
adf = _filter_exclusions(adf.copy(), exclusion_periods_utc)
expected_values = [
np.nan,
np.nan,
Expand Down Expand Up @@ -188,7 +188,7 @@ def test_filter_yaw_exclusions() -> None:
("MRG_T02", pd.Timestamp("2021-01-01 00:29:00", tz="UTC"), pd.Timestamp("2021-01-01 00:31:00", tz="UTC")),
("ALL", pd.Timestamp("2021-01-01 00:30:00", tz="UTC"), pd.Timestamp("2022-02-02 00:00:00", tz="UTC")),
]
adf = filter_yaw_exclusions(adf.copy(), yaw_data_exclusions_utc)
adf = _filter_yaw_exclusions(adf.copy(), yaw_data_exclusions_utc)
edf = pd.DataFrame(
data={
"ActivePowerMean": [1.0] * len(idx),
Expand Down Expand Up @@ -230,7 +230,7 @@ def test_filter_downtime() -> None:
},
index=idx,
)
adf = filter_downtime(adf)
adf = _filter_downtime(adf)
edf = pd.DataFrame(
data={
"ShutdownDuration": [np.nan, np.nan, 0, np.nan],
Expand All @@ -255,7 +255,7 @@ def test_filter_missing_rpm_or_pt() -> None:
},
index=idx,
)
adf = filter_missing_rpm_or_pt(adf)
adf = _filter_missing_rpm_or_pt(adf)
edf = pd.DataFrame(
data={
"GenRpmMean": [np.nan, np.nan, np.nan, -1],
Expand All @@ -281,7 +281,7 @@ def test_filter_rpm_and_pt_oor_one_ttype() -> None:
},
index=idx,
)
adf, na_rows = filter_rpm_and_pt_oor_one_ttype(adf, rpm_lower=800, rpm_upper=1600, pt_lower=-10, pt_upper=40)
adf, na_rows = _filter_rpm_and_pt_oor_one_ttype(adf, rpm_lower=800, rpm_upper=1600, pt_lower=-10, pt_upper=40)
edf = pd.DataFrame(
data={
"GenRpmMean": [np.nan, 800, 1600, np.nan, np.nan, 1000, 1000, np.nan],
Expand All @@ -308,7 +308,7 @@ def test_add_pw_clipped(test_marge_config: WindUpConfig) -> None:
cfg = test_marge_config
cfg.asset.wtgs[2].turbine_type.rated_power_kw = 100
cfg.asset.wtgs = cfg.asset.wtgs[:3]
adf = add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
adf = _add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
edf = pd.DataFrame(
data={
"ActivePowerMean": [-1, 0, 1901, -1, np.nan, 1900, -1, 1900, 1901],
Expand All @@ -324,9 +324,9 @@ def test_filter_rpm_and_pt(test_marge_config: WindUpConfig) -> None:
adf = pd.read_parquet(
Path(__file__).parents[0] / "test_data/smart_data/Marge Wind Farm/Marge Wind Farm_20230101_20230103.parquet"
)
adf = scada_multi_index(adf)
adf = add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
df_ = filter_rpm_and_pt(
adf = _scada_multi_index(adf)
adf = _add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
df_ = _filter_rpm_and_pt(
input_df=adf,
cfg=cfg,
plot_cfg=None,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_waking_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from wind_up.constants import RAW_DOWNTIME_S_COL, RAW_POWER_COL, RAW_WINDSPEED_COL, TIMESTAMP_COL
from wind_up.models import WindUpConfig
from wind_up.scada_funcs import scada_multi_index
from wind_up.scada_funcs import _scada_multi_index
from wind_up.waking_state import (
add_waking_scen,
add_waking_state,
Expand Down Expand Up @@ -220,7 +220,7 @@ def test_add_waking_state(test_lsa_t13_config: WindUpConfig) -> None:
test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
test_df.columns = test_df.columns.str.replace("test_", "")
test_df["TurbineName"] = "LSA_T01"
wf_df = scada_multi_index(test_df)
wf_df = _scada_multi_index(test_df)
expected_df = wf_df.copy()
wf_df = wf_df.drop(columns=["waking", "not_waking", "unknown_waking"])
# remove all turbines from cfg apart from LSA_T01
Expand Down
Loading

0 comments on commit 2fe80db

Please sign in to comment.