Source code for pownet.stochastic.demand

""" demand.py: Model for solar time series data"""

import numpy as np
import pandas as pd
from pmdarima import auto_arima
from statsmodels.regression.linear_model import OLS
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import STL, DecomposeResult

from .timeseries_model import TimeSeriesModel

import logging

logger = logging.getLogger(__name__)


[docs] class DemandTSModel(TimeSeriesModel): def __init__( self, ) -> None: super().__init__() self._monthly_models: dict[int, SARIMAX] = {} self._predictions: pd.Series = pd.Series() self._pred_residuals: pd.Series = pd.Series() # Specific model parameters self.monthly_reg_models: dict[int, OLS] = {} self.stl_seasonal_value = 24 * 7 - 1 # Must be an odd number self.stl_period_value = 24 self.monthly_stl_results: dict[int, DecomposeResult] = {} @property def monthly_models(self) -> dict: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._monthly_models @property def predictions(self) -> pd.Series: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._predictions @property def pred_residuals(self) -> pd.Series: if not self._is_fitted: raise ValueError("Model must be fitted first!") return self._pred_residuals def _fit( self, target_column: str, exog_vars: list[str], arima_order: tuple[int, int, int], seasonal_order: tuple[int, int, int, int] = None, ) -> None: """Steps include 1) Fit a regression to get yt 2) Fit an STL to the regression residuals to get ytt 3) Fit a SARIMAX model to ytt Note that the user must at lease use temperature as a predictor """ self._pred_residuals = pd.Series() for month in self.months: logger.info(f"Fitting SARIMAX model for month {month}") # Subset the data for the month monthly_y = self.data.loc[self.data.index.month == month, target_column] monthly_exog = self.data.loc[ self.data.index.month == month, exog_vars ].astype(float) # Regression model reg_model = OLS(monthly_y, monthly_exog).fit() monthly_yt = reg_model.resid # STL model stl_model = STL( monthly_yt, seasonal=self.stl_seasonal_value, period=self.stl_period_value, ) stl_result = stl_model.fit() # SARIMAX model monthly_ytt = monthly_yt - stl_result.seasonal - stl_result.trend sarimax_model = SARIMAX( monthly_ytt, order=arima_order, seasonal_order=seasonal_order, ).fit(disp=True) # Store the models, and residuals self.monthly_reg_models[month] = reg_model self._monthly_models[month] = sarimax_model self.monthly_stl_results[month] = stl_result if self._pred_residuals.empty: self._pred_residuals = sarimax_model.resid else: self._pred_residuals = pd.concat( [self._pred_residuals, sarimax_model.resid] ) self._pred_residuals.name = "value" def _predict(self) -> pd.Series: self._predictions = pd.Series() for month in self.months: monthly_exog = self.data.loc[ self.data.index.month == month, self.exog_vars ].astype(float) sarimax_model = self._monthly_models[month] # SARIMAX model ytt, which is the trend and residuals of LOESS monthly_ytt_pred = sarimax_model.predict() monthly_yt_pred = ( monthly_ytt_pred + self.monthly_stl_results[month].seasonal ) # Regression model to get y reg_pred = self.monthly_reg_models[month].predict(monthly_exog) monthly_y_pred = monthly_yt_pred + reg_pred monthly_y_pred.name = "value" if self._predictions.empty: self._predictions = monthly_y_pred else: self._predictions = pd.concat([self._predictions, monthly_y_pred]) return self._predictions def _get_synthetic(self, exog_data: pd.DataFrame, seed: int) -> pd.Series: synthetic_y = pd.Series() for month in self.months: # Models are fitted for each month stl_result = self.monthly_stl_results[month] sarimax_model = self._monthly_models[month] monthly_exog = exog_data.loc[exog_data.index.month == month] # Create a synthetic time series for each day for day in monthly_exog.index.day.unique(): # Regression model start_time = monthly_exog.loc[monthly_exog.index.day == day].index[0] end_time = start_time + pd.Timedelta(hours=23) daily_exog = monthly_exog.loc[start_time:end_time] reg_pred = self.monthly_reg_models[month].predict(exog=daily_exog) reg_pred = pd.Series(reg_pred, index=daily_exog.index) # Bootstrap the SARIMAX predictions within 95% confidence interval sarimax_pred = sarimax_model.predict() sarimax_pred_ci = np.percentile(sarimax_pred, [2.5, 97.5]) sarimax_bootstrap = np.random.choice( sarimax_pred_ci, size=len(daily_exog) ) sarimax_bootstrap = pd.Series(sarimax_bootstrap, index=daily_exog.index) # Recover electricity demand by adding predictions from the regression model, # predictions from SARIMAX, and also the STL's seasonal and trend components daily_stl_seasonal = stl_result.seasonal.loc[start_time:end_time] daily_stl_trend = stl_result.trend.loc[start_time:end_time] daily_syn_y = ( sarimax_bootstrap + reg_pred + daily_stl_seasonal + daily_stl_trend ) daily_syn_y.name = "value" if synthetic_y.empty: synthetic_y = daily_syn_y else: synthetic_y = pd.concat([synthetic_y, daily_syn_y]) return synthetic_y def _find_best_model( self, target_column: str, exog_vars: list[str], month_to_use: int, seed: int, suppress_warnings: bool, ) -> tuple[tuple[int, int, int], tuple[int, int, int, int]]: monthly_y = self.data.loc[self.data.index.month == month_to_use, target_column] monthly_exog = self.data.loc[ self.data.index.month == month_to_use, exog_vars ].astype(float) # Regression model reg_model = OLS(monthly_y, monthly_exog).fit() monthly_yt = reg_model.resid # STL model stl_model = STL( monthly_yt, seasonal=self.stl_seasonal_value, period=self.stl_period_value, ) stl_result = stl_model.fit() # SARIMAX model monthly_ytt = monthly_yt - stl_result.seasonal - stl_result.trend best_model = auto_arima( monthly_ytt, X=monthly_exog, start_p=0, start_q=0, max_p=2, max_d=2, max_q=2, seasonal=False, information_criterion="aic", stepwise=True, suppress_warnings=suppress_warnings, error_action="warn", random_state=seed, ) return best_model.order, best_model.seasonal_order