Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cli/pvlive #54

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/open_data_pvnet/configs/met_office_uk_data_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
general:
description: Config for accessing Met Office UK Deterministic NWP data
provider: "met_office"
name: PVNet current (Met Office UK)
destination_platform: "huggingface"
destination_dataset_id: "openclimatefix/met-office-uk-deterministic-solar"
Expand Down
10 changes: 10 additions & 0 deletions src/open_data_pvnet/configs/pvlive_data_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
general:
description: Config for accessing PV-Live data
provider: "pv_live"
name: pvlive
destination_platform: "huggingface"
destination_dataset_id: "Ali-ws/ocf-pvlive"


input_data:
local_data: "tmp/data/pvlive"
2 changes: 1 addition & 1 deletion src/open_data_pvnet/nwp/met_office.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# Define the configuration paths for UK and Global
CONFIG_PATHS = {
"uk": PROJECT_BASE / "src/open_data_pvnet/configs/met_office_uk_data_config.yaml",
"uk": PROJECT_BASE / "src/open_data_pvnet/configs/pvlive_data_config.yaml",
"global": PROJECT_BASE / "src/open_data_pvnet/configs/met_office_global_data_config.yaml",
}

Expand Down
7 changes: 7 additions & 0 deletions src/open_data_pvnet/scripts/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from open_data_pvnet.nwp.met_office import process_met_office_data
from open_data_pvnet.nwp.gfs import process_gfs_data
from open_data_pvnet.nwp.dwd import process_dwd_data
from open_data_pvnet.scripts.collect_pvlive_data import process_pvlive_data

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -61,5 +62,11 @@ def handle_archive(
f"Processing DWD data for {year}-{month:02d}-{day:02d} at hour {hour:02d} with overwrite={overwrite}"
)
process_dwd_data(year, month, day, hour, overwrite=overwrite)
elif provider == "pvlive":
logger.info(
f"Processing PVLive data for {year}-{month:02d}-{day:02d} with overwrite={overwrite}"
)
process_pvlive_data(year, month, day, hour, region, overwrite=overwrite, archive_type=archive_type)

else:
raise NotImplementedError(f"Provider {provider} not yet implemented")
83 changes: 70 additions & 13 deletions src/open_data_pvnet/scripts/collect_pvlive_data.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,89 @@
import pandas as pd
import logging
from open_data_pvnet.utils.env_loader import PROJECT_BASE
from pathlib import Path
from datetime import datetime
from fetch_pvlive_data import PVLiveData
from open_data_pvnet.utils.config_loader import load_config
from open_data_pvnet.utils.data_uploader import upload_to_huggingface
from open_data_pvnet.utils.data_converters import convert_nc_to_zarr
import pytz
import xarray as xr
import numpy as np
import os

logger = logging.getLogger(__name__)

pv = PVLiveData()
# Define the configuration paths for UK and Global
CONFIG_PATHS = {
"uk": PROJECT_BASE / "src/open_data_pvnet/configs/pvlive_data_config.yaml",
}

start = datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=pytz.UTC)
end = datetime(2025, 1, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
def collect_pvlive_data(
year: int,
month: int,
day: int,
hour: int,
overwrite: bool = False,
):
config_path = CONFIG_PATHS["uk"]
config = load_config(config_path)
logger.info(f"Loaded configuration from {config_path}")

data = pv.get_data_between(start=start, end=end, extra_fields="capacity_mwp")
df = pd.DataFrame(data)
local_path = PROJECT_BASE / config["input_data"]["local_data"] / "target_data.nc"

logger.info(f"Downloading PVlive data to {local_path}")
print(f"Downloading PVlive data to {local_path}")

df["datetime_gmt"] = pd.to_datetime(df["datetime_gmt"], utc=True)
df["datetime_gmt"] = df["datetime_gmt"].dt.tz_convert(None)
pv = PVLiveData()

ds = xr.Dataset.from_dataframe(df)
start = datetime(year, month, day, hour, 0, 0, tzinfo=pytz.utc)
end = datetime(year, month, day, hour, 0, 0, tzinfo=pytz.utc)

ds["datetime_gmt"] = ds["datetime_gmt"].astype(np.datetime64)
data = pv.get_data_between(start=start, end=end, extra_fields="capacity_mwp")
df = pd.DataFrame(data)

local_path = os.path.join(os.path.dirname(__file__), "..", "data", "target_data.nc")
df["datetime_gmt"] = pd.to_datetime(df["datetime_gmt"], utc=True)
df["datetime_gmt"] = df["datetime_gmt"].dt.tz_convert(None)

os.makedirs(os.path.dirname(local_path), exist_ok=True)
ds.to_netcdf(local_path)
ds = xr.Dataset.from_dataframe(df)

logger.info(f"Data successfully stored in {local_path}")
ds["datetime_gmt"] = ds["datetime_gmt"].astype(np.datetime64)

if not overwrite and os.path.exists(local_path):
logger.info(f"File {local_path} already exists and overwrite is set to False.")
return None

os.makedirs(os.path.dirname(local_path), exist_ok=True)
ds.to_netcdf(local_path)

logger.info(f"PVlive data stored successfully in {local_path}")

return local_path


def process_pvlive_data(
year: int,
month: int,
day: int,
hour: int,
overwrite: bool = False,
):
local_path = collect_pvlive_data(year, month, day, hour, overwrite)
if not local_path:
logger.error("Failed to collect PVlive data.")
return

local_path = Path(local_path)

local_path = local_path.parent
output_dir = local_path / "zarr"
convert_nc_to_zarr(local_path, output_dir, overwrite)

upload_to_huggingface(config_path=CONFIG_PATHS["uk"],folder_name=local_path, year=year, month=month, day=day,overwrite=overwrite)

logger.info(f"PVlive data for {year}-{month:02d}-{day:02d} at hour {hour:02d} uploaded successfully.")


if __name__ == "__main__":
process_pvlive_data(2021, 1, 1, 0, overwrite=True)
8 changes: 5 additions & 3 deletions src/open_data_pvnet/utils/data_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
def _validate_config(config):
"""Validate configuration and return required values."""
repo_id = config.get("general", {}).get("destination_dataset_id")
provider = config.get("general", {}).get("provider")
if not repo_id:
raise ValueError("No destination_dataset_id found in the configuration file.")

local_output_dir = config["input_data"]["nwp"]["met_office"]["local_output_dir"]
if provider =="met_office":
local_output_dir = config["input_data"]["nwp"]["met_office"]["local_output_dir"]
elif provider == "pv_live":
local_output_dir = config["input_data"]["local_data"]
zarr_base_path = Path(local_output_dir) / "zarr"
return repo_id, zarr_base_path


def _validate_token():
"""Validate Hugging Face token and return API instance."""
hf_token = os.getenv("HUGGINGFACE_TOKEN")
Expand Down