Source code for pownet.stochastic.solar

""" solar.py: Model for solar time series data"""

import numpy as np
import pandas as pd
from pmdarima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import STL, DecomposeResult

from .timeseries_model import TimeSeriesModel

import logging

logger = logging.getLogger(__name__)


[docs] def process_solar_series(solar_ts: pd.Series, suntimes: pd.DataFrame) -> pd.Series: # Remove negative values output_df = solar_ts.copy() # Convert sunrise and sunset to datetime suntimes["sunrise"] = pd.to_datetime(suntimes["sunrise"]) suntimes["sunset"] = pd.to_datetime(suntimes["sunset"]) output_df = output_df.to_frame().join(suntimes) output_df.loc[ (output_df.index.hour < output_df["sunrise"].dt.hour) | (output_df.index.hour > output_df["sunset"].dt.hour), "value", ] = 0 # Any negative value is zero output_df["value"] = np.maximum(output_df["value"], 0) # Drop sunrise and sunset columns output_df = output_df.drop(columns=["sunrise", "sunset"]) # All negative values are set to zero output_df["value"] = np.maximum(output_df["value"], 0) return output_df
[docs] class SolarTSModel(TimeSeriesModel): def __init__( self, ) -> None: super().__init__() self._monthly_models: dict[int, SARIMAX] = {} self._predictions: pd.Series = pd.Series() self._pred_residuals: pd.Series = pd.Series() # Specific model parameters self.stl_seasonal_value = 24 * 7 - 1 # Must be an odd number self.stl_period_value = 24 self.monthly_stl_results: dict[int, DecomposeResult] = {} @property def monthly_models(self) -> dict: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._monthly_models @property def predictions(self) -> pd.Series: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._predictions @property def pred_residuals(self) -> pd.Series: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._pred_residuals def _fit( self, target_column: str, arima_order: tuple[int, int, int], seasonal_order: tuple[int, int, int, int] = None, exog_vars: list[str] = None, ) -> None: # Check that self.data has sunrise and sunset columns if "sunrise" not in self.data.columns or "sunset" not in self.data.columns: raise ValueError("Data should have columns 'sunrise' and 'sunset'") self._pred_residuals = pd.Series() for month in self.months: logger.info(f"Fitting SARIMAX model for month {month}") monthly_y = self.data.loc[self.data.index.month == month, target_column] # We are interested in fitting a time series model to the # trend + residuals of the STL decomposition # The choice of seasonal argument is arbitrary to weekly patterns stl_model = STL( monthly_y, seasonal=self.stl_seasonal_value, period=self.stl_period_value, ) stl_result = stl_model.fit() monthly_yt = monthly_y - stl_result.seasonal # SARIMAX exog_data = ( self.data.loc[self.data.index.month == month, exog_vars] if exog_vars else None ) sarimax_model = SARIMAX( monthly_yt, exog=exog_data, order=arima_order, seasonal_order=seasonal_order, ).fit(disp=True) # Store the models, and residuals self._monthly_models[month] = sarimax_model self.monthly_stl_results[month] = stl_result if self._pred_residuals.empty: self._pred_residuals = sarimax_model.resid else: self._pred_residuals = pd.concat( [self._pred_residuals, sarimax_model.resid] ) self._pred_residuals.name = "value" def _predict(self) -> pd.Series: self._predictions = pd.Series() for month in self.months: sarimax_model = self._monthly_models[month] # The SARIMAX model predicts yt, which is the trend and residuals of LOESS monthly_yt_pred = sarimax_model.predict() monthly_y_pred = monthly_yt_pred + self.monthly_stl_results[month].seasonal monthly_y_pred.name = "value" if self._predictions.empty: self._predictions = monthly_y_pred else: self._predictions = pd.concat([self._predictions, monthly_y_pred]) # Post-processing involves removing negative values and # setting irradiance to zero during night hours suntimes = self.data.loc[:, ["sunrise", "sunset"]] self._predictions = process_solar_series(self._predictions, suntimes) return self._predictions def _get_synthetic(self, exog_data: pd.DataFrame, seed: int) -> pd.Series: synthetic_y = pd.Series() for month in self.months: sarimax_model = self._monthly_models[month] stl_result = self.monthly_stl_results[month] # Find the maximum number of days in the month max_day = self.data.loc[self.data.index.month == month].shape[0] // 24 for day in range(1, max_day + 1): # Extract the start and end time for the day for indexing # the synthetic data start_time = self.data.loc[ (self.data.index.month == month) & (self.data.index.day == day) ].index[0] end_time = start_time + pd.Timedelta("23h") # This is the trend + residuals of the STL decomposition daily_exog_data = None if exog_data is not None: daily_exog_data = exog_data.loc[start_time:end_time, :] daily_yt_syn = sarimax_model.simulate( exog=daily_exog_data, nsimulations=24, anchor="end", random_state=seed, ) # Add the seasonal component from the STL decomposition to # get the synthetic data (irradiance) daily_y_syn = ( daily_yt_syn.values + stl_result.seasonal.loc[start_time:end_time].values ) daily_y_syn = pd.Series( daily_y_syn, name="value", index=pd.date_range(start_time, end_time, freq="h"), ) if synthetic_y.empty: synthetic_y = daily_y_syn else: synthetic_y = pd.concat([synthetic_y, daily_y_syn]) # Post-processing involves removing negative values and # setting irradiance to zero during night hours suntimes = self.data.loc[:, ["sunrise", "sunset"]] synthetic_y = process_solar_series(synthetic_y, suntimes) return synthetic_y def _find_best_model( self, target_column: str, exog_vars: list[str], month_to_use: int, seed: int, suppress_warnings: bool, ) -> tuple[tuple[int, int, int], tuple[int, int, int, int]]: monthly_y = self.data.loc[self.data.index.month == month_to_use, target_column] monthly_exog = None if exog_vars: monthly_exog = self.data.loc[ self.data.index.month == month_to_use, exog_vars ] # Find monthly_yt stl_model = STL( monthly_y, seasonal=self.stl_seasonal_value, period=self.stl_period_value, ) stl_result = stl_model.fit() monthly_yt = monthly_y - stl_result.seasonal # Find the best model best_model = auto_arima( monthly_yt, X=monthly_exog, start_p=0, start_q=0, max_p=2, max_d=2, max_q=2, seasonal=False, information_criterion="aic", stepwise=True, suppress_warnings=suppress_warnings, error_action="warn", random_state=seed, ) return best_model.order, best_model.seasonal_order