docs: add docstrings (#49)

* docs: add docstrings Closes #22 * address PR comments - Added TODO comment on "D" exclusion in `tests` folder - Elaborate on the docstring in `wind_up/interface.py` - Rename `math_funcs.py` to `circular_math.py` + update module docstring
resgroup · Jan 27, 2025 · 2fe80db · 2fe80db
1 parent 15c7f2f
commit 2fe80db
Show file tree

Hide file tree

Showing 41 changed files with 746 additions and 274 deletions.
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1 @@
+"""Examples of using wind-up."""
diff --git a/examples/helpers.py b/examples/helpers.py
@@ -1,3 +1,5 @@
+"""Helper functions for the examples."""
+
 from __future__ import annotations
 
 import logging
@@ -19,6 +21,7 @@
 
 
 def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None:
+    """Initializes the logger with a file handler and a console handler."""
     log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-8s]  %(message)s")
     root_logger = logging.getLogger()
     root_logger.setLevel(level)

diff --git a/examples/kelmarsh_kaggle.py b/examples/kelmarsh_kaggle.py
@@ -1,7 +1,8 @@
 """Example submission for https://www.kaggle.com/competitions/predict-the-wind-speed-at-a-wind-turbine/.
 
 wind-up is used for feature engineering, in particular estimating wind speed based on power
-and directional detrending."""
+and directional detrending.
+"""
 
 from __future__ import annotations
 
@@ -247,7 +248,8 @@ def make_windup_features(analysis_name: str) -> None:
 def save_t1_detrend_dfs(assessment_inputs: AssessmentInputs) -> None:
     """Save the detrended dataframes for Kelmarsh 1 and the reference turbines.
 
-    note most of this logic is copied from wind_up/main_analysis.py"""
+    note most of this logic is copied from wind_up/main_analysis.py
+    """
     wf_df = assessment_inputs.wf_df
     cfg = assessment_inputs.cfg
     plot_cfg = assessment_inputs.plot_cfg
@@ -465,7 +467,8 @@ def sun_alt(
 ) -> float:
     """Calculate sun altitude for a given row in a DataFrame.
 
-    This code was adapted from https://github.com/NREL/flasc"""
+    This code was adapted from https://github.com/NREL/flasc
+    """
     observer.lat = str(latitude)
     observer.long = str(longitude)
     observer.date = row[utc_timestamp_col] + time_shift
@@ -484,7 +487,8 @@ def add_sun_alt_to_df(
 ) -> pd.DataFrame:
     """Calculate sun altitude for a given row in a DataFrame.
 
-    This code was adapted from https://github.com/NREL/flasc"""
+    This code was adapted from https://github.com/NREL/flasc
+    """
     out_df = input_df.copy()
     observer = ephem.Observer()
     return out_df.assign(

diff --git a/examples/smarteole_example.py b/examples/smarteole_example.py
@@ -48,9 +48,7 @@
 def unpack_smarteole_scada(
     timebase_s: int, scada_data_file: Path | str | IO[bytes] = DEFAULT_SCADA_FILE_PATH
 ) -> pd.DataFrame:
-    """
-    Function that translates 1-minute SCADA data to x minute data in the wind-up expected format
-    """
+    """Function that translates 1-minute SCADA data to x minute data in the wind-up expected format"""
 
     def _separate_turbine_id_from_field(x: str) -> tuple[str, str]:
         parts = x.split("_")

diff --git a/examples/wedowind_example.py b/examples/wedowind_example.py
@@ -208,8 +208,7 @@ def create_fake_wedowind_reanalysis_dataset(start_datetime: dt.datetime) -> Rean
 
 
 def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates:
-    """
-    Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig.
+    """Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig.
 
     Args:
         scada_df:
@@ -258,8 +257,7 @@ def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates:
 def generate_custom_exploratory_plots(
     scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int, out_dir: Path
 ) -> Path:
-    """
-    These custom plots are to help with SCADA data exploration.
+    """These custom plots are to help with SCADA data exploration.
     It was created because it was unclear how the SCADA data is related to the metadata so helped in looking for wakes
     in the data.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -82,11 +82,12 @@ select = ["ALL"] # https://beta.ruff.rs/docs/rules/
 ignore = [
     "ANN204", # `__init__` doesn't need annotations
     "S301", # `pickle` and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue
-    "D", # docstring checks
     "PGH004", # Use specific rule codes when using `noqa`. Seems to give false alarms
     "COM812", # can conflict with ruff formatter
     "ISC001", # can conflict with ruff formatter
     "G004", # logging statement can use f-string
+    "D203",  # `incorrect-blank-line-before-class` (D203) and `no-blank-line-before-class` (D211) are incompatible
+    "D213",  # `multi-line-summary-first-line` (D212) and `multi-line-summary-second-line` (D213) are incompatible
 ]
 
 [tool.ruff.lint.mccabe]
@@ -99,11 +100,18 @@ max-args = 17 # try to bring this down to 5
 
 [tool.ruff.lint.per-file-ignores]
 "wind_up/models.py" = ["N805", "PERF401"] # try to eliminate this
-"tests/**/*.py" = ["S101", "PLR2004"]  # allow `assert` and magic values in tests
+"tests/**/*.py" = [
+    "S101",  # allow `assert`
+    "PLR2004",  # allow magic values
+    "D",  # TODO: remove this
+]
 "tests/test_smart_data.py" = ["DTZ001"]  # SMART functions use tz naive datetimes
 "**/__init__.py" = ["F401"]  # ignore unused imports in __init__.py
-"examples/**/*.py" = ["T20"]  # allow print in examples
+"examples/**/*.py" = ["T20", "D"]
 "examples/**/kelmarsh_kaggle.py" = ["S101","PLR0915","PD013","N806"]
+"wind_up/smart_data.py" = ["D"]  # RES specific data loading
+"wind_up/plots/*.py" = ["D"]  # TODO: remove this
+"examples/*.ipynb" = ["D"]  # Jupyter notebooks
 
 [tool.mypy]
 plugins = ["pydantic.mypy"]

diff --git a/tests/test_main_analysis.py b/tests/test_main_analysis.py
@@ -5,7 +5,7 @@
 from pandas.testing import assert_frame_equal
 
 from wind_up.constants import TIMESTAMP_COL
-from wind_up.main_analysis import toggle_pairing_filter
+from wind_up.main_analysis import _toggle_pairing_filter
 
 
 def test_toggle_pairing_filter_method_none() -> None:
@@ -35,7 +35,7 @@ def test_toggle_pairing_filter_method_none() -> None:
         index=post_tstamps,
     )
 
-    filt_pre_df, filt_post_df = toggle_pairing_filter(
+    filt_pre_df, filt_post_df = _toggle_pairing_filter(
         pre_df=pre_df,
         post_df=post_df,
         pairing_filter_method="none",
@@ -107,7 +107,7 @@ def copy_of_make_extended_time_index(
     exp_filt_post_df = b[
         [x in copy_of_make_extended_time_index(a.index, pd.Timedelta("10min"), tolerance_minutes * 60) for x in b.index]
     ]
-    filt_pre_df, filt_post_df = toggle_pairing_filter(
+    filt_pre_df, filt_post_df = _toggle_pairing_filter(
         pre_df=pre_df,
         post_df=post_df,
         pairing_filter_method="any_within_timedelta",

diff --git a/tests/test_math_funcs.py b/tests/test_math_funcs.py
@@ -5,7 +5,7 @@
 import pytest
 from pandas.testing import assert_series_equal
 
-from wind_up.math_funcs import circ_diff
+from wind_up.circular_math import circ_diff
 
 test_circ_diff_data = [
     (0, 0, 0),

diff --git a/tests/test_northing.py b/tests/test_northing.py
@@ -5,8 +5,8 @@
 
 from wind_up.constants import REANALYSIS_WD_COL
 from wind_up.models import WindUpConfig
-from wind_up.northing import apply_northing_corrections, calc_max_abs_north_errs
-from wind_up.scada_funcs import scada_multi_index
+from wind_up.northing import _calc_max_abs_north_errs, apply_northing_corrections
+from wind_up.scada_funcs import _scada_multi_index
 
 
 def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
@@ -19,7 +19,7 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
         wtg_df = test_df.copy()
         wtg_df["TurbineName"] = wtg_name
         wf_df = pd.concat([wf_df, wtg_df])
-    wf_df = scada_multi_index(wf_df)
+    wf_df = _scada_multi_index(wf_df)
     wf_df_after_northing = apply_northing_corrections(wf_df, cfg=cfg, north_ref_wd_col=REANALYSIS_WD_COL, plot_cfg=None)
 
     median_yaw_before_northing = wf_df.groupby("TurbineName")["YawAngleMean"].median()
@@ -32,10 +32,10 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
     assert median_yaw_after_northing["LSA_T13"] == pytest.approx(235.22855377197266)
     assert median_yaw_after_northing["LSA_T14"] == pytest.approx(224.92881774902344)
 
-    abs_north_errs_before_northing = calc_max_abs_north_errs(
+    abs_north_errs_before_northing = _calc_max_abs_north_errs(
         wf_df, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
     )
-    abs_north_errs_after_northing = calc_max_abs_north_errs(
+    abs_north_errs_after_northing = _calc_max_abs_north_errs(
         wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
     )
     assert abs_north_errs_before_northing.min() == pytest.approx(7.88920288085938)

diff --git a/tests/test_optimize_northing.py b/tests/test_optimize_northing.py
@@ -8,7 +8,7 @@
 from tests.conftest import TEST_DATA_FLD
 from wind_up.constants import RAW_DOWNTIME_S_COL, RAW_POWER_COL, RAW_YAWDIR_COL, TIMESTAMP_COL
 from wind_up.models import WindUpConfig
-from wind_up.optimize_northing import auto_northing_corrections, clip_wtg_north_table
+from wind_up.optimize_northing import _clip_wtg_north_table, auto_northing_corrections
 from wind_up.reanalysis_data import ReanalysisDataset, add_reanalysis_data
 
 
@@ -40,7 +40,7 @@ def test_clip_wtg_north_table_entries_before() -> None:
             "north_offset": [1, 2, 3],
         },
     )
-    actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
+    actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
     assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)
 
 
@@ -72,7 +72,7 @@ def test_clip_wtg_north_table_entry_exactly_at_start() -> None:
             "north_offset": [1, 2, 3],
         },
     )
-    actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
+    actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
     assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)
 
 
@@ -101,7 +101,7 @@ def test_clip_wtg_north_table_entry_after_start() -> None:
             "north_offset": [0],
         },
     )
-    actual_wtg_north_table = clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
+    actual_wtg_north_table = _clip_wtg_north_table(initial_wtg_north_table, wtg_df=wtg_df)
     assert_frame_equal(actual_wtg_north_table, expected_wtg_north_table)
 
 

diff --git a/tests/test_pp_analysis.py b/tests/test_pp_analysis.py
@@ -5,7 +5,7 @@
 from pandas.testing import assert_frame_equal
 
 from wind_up.models import WindUpConfig
-from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal
+from wind_up.pp_analysis import _pre_post_pp_analysis_with_reversal
 
 
 def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -> None:
@@ -22,7 +22,7 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
     lt_wtg_df_filt = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_filt.parquet")
     test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
     expected_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/pre_post_pp_analysis_expected_df.parquet")
-    pp_results, actual_df = pre_post_pp_analysis_with_reversal(
+    pp_results, actual_df = _pre_post_pp_analysis_with_reversal(
         cfg=cfg,
         test_wtg=test_wtg,
         ref_name=ref_name,

diff --git a/tests/test_scada_funcs.py b/tests/test_scada_funcs.py
@@ -7,18 +7,18 @@
 from wind_up.constants import TIMESTAMP_COL
 from wind_up.models import WindUpConfig
 from wind_up.scada_funcs import (
-    add_pw_clipped,
-    filter_bad_pw_ws,
-    filter_downtime,
-    filter_exclusions,
-    filter_missing_rpm_or_pt,
-    filter_rpm_and_pt,
-    filter_rpm_and_pt_oor_one_ttype,
-    filter_stuck_data,
-    filter_wrong_yaw,
-    filter_yaw_exclusions,
-    scada_multi_index,
-    wrap_yaw_and_pitch,
+    _add_pw_clipped,
+    _filter_bad_pw_ws,
+    _filter_downtime,
+    _filter_exclusions,
+    _filter_missing_rpm_or_pt,
+    _filter_rpm_and_pt,
+    _filter_rpm_and_pt_oor_one_ttype,
+    _filter_stuck_data,
+    _filter_wrong_yaw,
+    _filter_yaw_exclusions,
+    _scada_multi_index,
+    _wrap_yaw_and_pitch,
 )
 
 
@@ -45,7 +45,7 @@ def test_filter_stuck_data() -> None:
     expected.iloc[-2, :] = np.nan
 
     # Run the function
-    actual = filter_stuck_data(input_df)
+    actual = _filter_stuck_data(input_df)
 
     # Check the output is as expected
     assert_frame_equal(actual, expected)
@@ -64,7 +64,7 @@ def test_filter_bad_pw_ws() -> None:
         },
         index=idx,
     )
-    adf = filter_bad_pw_ws(adf, max_rated_power=2000)
+    adf = _filter_bad_pw_ws(adf, max_rated_power=2000)
     edf = pd.DataFrame(
         data={
             "ActivePowerMean": [np.nan, np.nan, np.nan, np.nan, 4000, np.nan, -1000, np.nan, np.nan],
@@ -89,7 +89,7 @@ def test_wrap_yaw_and_pitch() -> None:
         },
         index=idx,
     )
-    adf = wrap_yaw_and_pitch(adf)
+    adf = _wrap_yaw_and_pitch(adf)
     edf = pd.DataFrame(
         data={
             "YawAngleMean": [179, 180, 181, 359, 0, 1, 359, 0, 1],
@@ -113,7 +113,7 @@ def test_filter_wrong_yaw() -> None:
         },
         index=idx,
     )
-    adf = filter_wrong_yaw(adf)
+    adf = _filter_wrong_yaw(adf)
     edf = pd.DataFrame(
         data={
             "YawAngleMean": [180, 180, 180, np.nan, np.nan, np.nan, np.nan, 180, 180],
@@ -141,7 +141,7 @@ def test_filter_exclusions() -> None:
         ("MRG_T02", pd.Timestamp("2021-01-01 00:29:00", tz="UTC"), pd.Timestamp("2021-01-01 00:31:00", tz="UTC")),
         ("ALL", pd.Timestamp("2021-01-01 00:30:00", tz="UTC"), pd.Timestamp("2022-02-02 00:00:00", tz="UTC")),
     ]
-    adf = filter_exclusions(adf.copy(), exclusion_periods_utc)
+    adf = _filter_exclusions(adf.copy(), exclusion_periods_utc)
     expected_values = [
         np.nan,
         np.nan,
@@ -188,7 +188,7 @@ def test_filter_yaw_exclusions() -> None:
         ("MRG_T02", pd.Timestamp("2021-01-01 00:29:00", tz="UTC"), pd.Timestamp("2021-01-01 00:31:00", tz="UTC")),
         ("ALL", pd.Timestamp("2021-01-01 00:30:00", tz="UTC"), pd.Timestamp("2022-02-02 00:00:00", tz="UTC")),
     ]
-    adf = filter_yaw_exclusions(adf.copy(), yaw_data_exclusions_utc)
+    adf = _filter_yaw_exclusions(adf.copy(), yaw_data_exclusions_utc)
     edf = pd.DataFrame(
         data={
             "ActivePowerMean": [1.0] * len(idx),
@@ -230,7 +230,7 @@ def test_filter_downtime() -> None:
         },
         index=idx,
     )
-    adf = filter_downtime(adf)
+    adf = _filter_downtime(adf)
     edf = pd.DataFrame(
         data={
             "ShutdownDuration": [np.nan, np.nan, 0, np.nan],
@@ -255,7 +255,7 @@ def test_filter_missing_rpm_or_pt() -> None:
         },
         index=idx,
     )
-    adf = filter_missing_rpm_or_pt(adf)
+    adf = _filter_missing_rpm_or_pt(adf)
     edf = pd.DataFrame(
         data={
             "GenRpmMean": [np.nan, np.nan, np.nan, -1],
@@ -281,7 +281,7 @@ def test_filter_rpm_and_pt_oor_one_ttype() -> None:
         },
         index=idx,
     )
-    adf, na_rows = filter_rpm_and_pt_oor_one_ttype(adf, rpm_lower=800, rpm_upper=1600, pt_lower=-10, pt_upper=40)
+    adf, na_rows = _filter_rpm_and_pt_oor_one_ttype(adf, rpm_lower=800, rpm_upper=1600, pt_lower=-10, pt_upper=40)
     edf = pd.DataFrame(
         data={
             "GenRpmMean": [np.nan, 800, 1600, np.nan, np.nan, 1000, 1000, np.nan],
@@ -308,7 +308,7 @@ def test_add_pw_clipped(test_marge_config: WindUpConfig) -> None:
     cfg = test_marge_config
     cfg.asset.wtgs[2].turbine_type.rated_power_kw = 100
     cfg.asset.wtgs = cfg.asset.wtgs[:3]
-    adf = add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
+    adf = _add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
     edf = pd.DataFrame(
         data={
             "ActivePowerMean": [-1, 0, 1901, -1, np.nan, 1900, -1, 1900, 1901],
@@ -324,9 +324,9 @@ def test_filter_rpm_and_pt(test_marge_config: WindUpConfig) -> None:
     adf = pd.read_parquet(
         Path(__file__).parents[0] / "test_data/smart_data/Marge Wind Farm/Marge Wind Farm_20230101_20230103.parquet"
     )
-    adf = scada_multi_index(adf)
-    adf = add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
-    df_ = filter_rpm_and_pt(
+    adf = _scada_multi_index(adf)
+    adf = _add_pw_clipped(adf, wtgs=cfg.asset.wtgs)
+    df_ = _filter_rpm_and_pt(
         input_df=adf,
         cfg=cfg,
         plot_cfg=None,

diff --git a/tests/test_waking_state.py b/tests/test_waking_state.py
@@ -8,7 +8,7 @@
 
 from wind_up.constants import RAW_DOWNTIME_S_COL, RAW_POWER_COL, RAW_WINDSPEED_COL, TIMESTAMP_COL
 from wind_up.models import WindUpConfig
-from wind_up.scada_funcs import scada_multi_index
+from wind_up.scada_funcs import _scada_multi_index
 from wind_up.waking_state import (
     add_waking_scen,
     add_waking_state,
@@ -220,7 +220,7 @@ def test_add_waking_state(test_lsa_t13_config: WindUpConfig) -> None:
     test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
     test_df.columns = test_df.columns.str.replace("test_", "")
     test_df["TurbineName"] = "LSA_T01"
-    wf_df = scada_multi_index(test_df)
+    wf_df = _scada_multi_index(test_df)
     expected_df = wf_df.copy()
     wf_df = wf_df.drop(columns=["waking", "not_waking", "unknown_waking"])
     # remove all turbines from cfg apart from LSA_T01