Skip to content
This repository was archived by the owner on Sep 7, 2023. It is now read-only.

Issue/perceiver update #102

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/experiment/perceiver_conv3d_sat_nwp.yaml
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@ defaults:
# this allows you to overwrite only specified parameters

seed: 518
load_model: /home/ec2-user/github/predict_pv_yield/logs/runs/2021-12-13/14-52-11/checkpoints/epoch_006.ckp

trainer:
min_epochs: 1
8 changes: 5 additions & 3 deletions configs/model/perceiver_conv3d_sat_nwp.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
_target_: predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat.Model

forecast_minutes: 30
history_minutes: 60
batch_size: 32
forecast_minutes: 120
history_minutes: 30
batch_size: 16
num_latents: 24
latent_dim: 24
embedding_dem: 0
output_variable: gsp_yield
conv3d_channels: 8
use_future_satellite_images: 0
include_pv_or_gsp_yield_history: False
include_pv_yield_history: True
16 changes: 16 additions & 0 deletions experiments/2021-12/2021-12-01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
1. Compare no future satellite data, with future satellite data

For conv3d model

With: https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-550/charts
Not: https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-549/charts

Validation Error
With: 0.0665
Not: 0.0670

data:
nwp
sat (no hrv, no future)
no gsp history
pv history
13 changes: 13 additions & 0 deletions experiments/2021-12/2021-12-02.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-571/monitoring

run Perceiver model

data
satdata (no future)
nwp
no gsp
historical pv

epoch time ~ 1.5 hours

Validation
24 changes: 18 additions & 6 deletions predict_pv_yield/models/base_model.py
Original file line number Diff line number Diff line change
@@ -153,6 +153,12 @@ def training_step(self, batch, batch_idx):
return self._training_or_validation_step(batch, tag="Train")

def validation_step(self, batch: BatchML, batch_idx):
self.validation_or_test_step(batch, batch_idx)

def test_step(self, batch: BatchML, batch_idx):
self.validation_or_test_step(batch, batch_idx)

def validation_or_test_step(self, batch: BatchML, batch_idx):

if type(batch) == dict:
batch = BatchML(**batch)
@@ -220,18 +226,18 @@ def validation_step(self, batch: BatchML, batch_idx):
pass

# save validation results
capacity = batch.gsp.gsp_capacity[:,-self.forecast_len_30:,0].cpu().numpy()
capacity = batch.gsp.gsp_capacity[0 : self.batch_size,-self.forecast_len_30:,0].cpu().numpy()
predictions = model_output.cpu().numpy()
truths = batch.gsp.gsp_yield[:, -self.forecast_len_30:, 0].cpu().numpy()
truths = batch.gsp.gsp_yield[0 : self.batch_size, -self.forecast_len_30:, 0].cpu().numpy()
predictions = predictions * capacity
truths = truths * capacity

results = make_validation_results(truths_mw=truths,
predictions_mw=predictions,
capacity_mwp=capacity,
gsp_ids=batch.gsp.gsp_id[:, 0].cpu(),
gsp_ids=batch.gsp.gsp_id[0:self.batch_size, 0].cpu(),
batch_idx=batch_idx,
t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc))
t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc)[0:self.batch_size])

# append so in 'validation_epoch_end' the file is saved
if batch_idx == 0:
@@ -249,8 +255,14 @@ def validation_epoch_end(self, outputs):
current_epoch=self.current_epoch,
logger=self.logger)

def test_step(self, batch, batch_idx):
self._training_or_validation_step(batch, tag="Test")
def test_epoch_end(self, outputs):

logger.info("Test epoch end")

save_validation_results_to_logger(results_dfs=self.results_dfs,
results_file_name=self.results_file_name,
current_epoch=self.current_epoch,
logger=self.logger)

def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=0.0005)
54 changes: 42 additions & 12 deletions predict_pv_yield/models/perceiver/perceiver_conv3d_nwp_sat.py
Original file line number Diff line number Diff line change
@@ -72,7 +72,9 @@ def __init__(
embedding_dem: int = 16,
output_variable: str = "pv_yield",
conv3d_channels: int = 16,
use_future_satellite_images: bool = True, # option not to use future sat images
use_future_satellite_images: bool = False, # option not to use future sat images
include_pv_or_gsp_yield_history: bool = False,
include_pv_yield_history: int = True,
):
"""
Idea is to have a conv3d (+max pool) layer before both sat and nwp data go into perceiver model.
@@ -86,6 +88,8 @@ def __init__(
self.embedding_dem = embedding_dem
self.output_variable = output_variable
self.use_future_satellite_images = use_future_satellite_images
self.include_pv_yield_history = include_pv_yield_history
self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history

super().__init__()

@@ -115,10 +119,16 @@ def __init__(
if self.embedding_dem:
self.pv_system_id_embedding = nn.Embedding(num_embeddings=940, embedding_dim=self.embedding_dem)

rnn_input_size = FC_OUTPUT_SIZE
if self.include_pv_or_gsp_yield_history:
rnn_input_size += 1
if self.include_pv_yield_history:
rnn_input_size += 128

# TODO: Get rid of RNNs!
self.encoder_rnn = nn.GRU(
# plus 1 for history
input_size=FC_OUTPUT_SIZE + 1,
input_size=rnn_input_size,
hidden_size=RNN_HIDDEN_SIZE,
num_layers=2,
batch_first=True,
@@ -169,8 +179,17 @@ def forward(self, x):

nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans)

assert nwp_width == width, f'widths should be the same({nwp_width},{width})'
assert nwp_height == height, f'heights should be the same({nwp_height},{height})'
# v15 the width and height are a lot less, so lets expand the sat data. There should be a better way
sat_data_zeros = torch.zeros(size=(new_batch_size, nwp_width - width, height, n_chans),
device=sat_data.device)
sat_data = torch.cat([sat_data, sat_data_zeros], dim=1)
sat_data_zeros = torch.zeros(size=(new_batch_size, nwp_width, nwp_height - height, n_chans),
device=sat_data.device)
sat_data = torch.cat([sat_data, sat_data_zeros], dim=2)
new_batch_size, sat_width, sat_height, sat_n_chans = sat_data.shape

assert nwp_width == sat_height, f'widths should be the same({nwp_width},{sat_width})'
assert nwp_height == sat_height, f'heights should be the same({nwp_height},{sat_height})'

data = torch.cat((sat_data, nwp_data), dim=-1)

@@ -214,14 +233,25 @@ def forward(self, x):
dim=2,
)

if self.output_variable == 'pv_yield':
# take the history of the pv yield of this system,
pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
elif self.output_variable == 'gsp_yield':
# take the history of the gsp yield of this system,
gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)
if self.include_pv_or_gsp_yield_history:
if self.output_variable == 'pv_yield':
# take the history of the pv yield of this system,
pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
elif self.output_variable == 'gsp_yield':
# take the history of the gsp yield of this system,
gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)

# add the pv yield history. This can be used if trying to predict gsp
if self.include_pv_yield_history:
pv_yield_history = (
x.pv.pv_yield[:self.batch_size].nan_to_num(nan=0.0).float()
)
# remove future pv
pv_yield_history[:, self.history_len_5 + 1:] = 0.0

encoder_input = torch.cat((rnn_input, pv_yield_history), dim=2)

encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden)
6 changes: 4 additions & 2 deletions predict_pv_yield/training.py
Original file line number Diff line number Diff line change
@@ -77,15 +77,17 @@ def train(config: DictConfig) -> Optional[float]:

# Train the model
log.info("Starting training!")
if 'validate_only' in config:
if 'load_model' is config:
model = model.load_from_checkpoint(checkpoint_path=config['load_model'])
elif 'validate_only' in config:
trainer.validate(model=model, datamodule=datamodule)
else:
trainer.fit(model=model, datamodule=datamodule)

# Evaluate model on test set, using the best model achieved during training
if config.get("test_after_training") and not config.trainer.get("fast_dev_run"):
log.info("Starting testing!")
trainer.test()
trainer.test(model=model, datamodule=datamodule)

# Make sure everything closed properly
log.info("Finalizing!")