Building a Forecast

Simple Forecast Methods

Implementation

library(fpp2) #Loads forecasting functionsm and data sets
#Forecast using each of above methods
milknaive<-naive(condmilk,h=12) #Naive forecast, 12 months out
milkmean<-meanf(condmilk,h=12) #Mean forecast, 12 months out
trivialFC<-c(0,0,0,0,0,0,0,0,0,0,0,0)
milktrivial<-ts(trivialFC,start=c(1981,1),frequency=12) #Trivial forecast, 12 months out

Plot

#Plot the forecasts
autoplot(condmilk,series="Observed")+
  autolayer(milknaive, PI=FALSE, series="Naive")+
  autolayer(milktrivial, series="Trivial")+
  autolayer(milkmean, PI=FALSE,series="Mean")+
  ggtitle("Forecasts for Monthly Milk Stocks") +
  guides(colour=guide_legend(title="Series and Forecasts"))

Methods with parameters

\[\{f(\mathcal{Y}_T,\theta,\text{other stuff})\}_{\theta\in\Theta}\]

Examples of Parametric Methods

Structured parametric models

Choosing parameters

Implementation

milkAR1selection<-Arima(condmilk,order=c(1,0,0),include.mean=FALSE) #Choose parameter of AR(1) model
    #Do not include added constant
milkAR1<-forecast(milkAR1selection,h=12) #Autoregression of order 1 forecast, 12 months out
milkAR4selection<-Arima(condmilk,order=c(4,0,0)) #Choose parameters of AR(4) model, including constant
milkAR4<-forecast(milkAR4selection,h=12) #Autoregression of order 4 forecast, 12 months out
milkexpsm<-ses(condmilk,h=12) #Simple Exponential Smoothing Forecast, 12 months out

Plot

#Plot the forecasts
autoplot(condmilk,series="Observed")+
  autolayer(milkAR1, PI=FALSE, series="AR(1) no constant")+
  autolayer(milkAR4, PI=FALSE,series="AR(4)")+
  autolayer(milkexpsm, PI=FALSE,series="Simple Exponential Smoothing")+
  ggtitle("Forecasts for Monthly Milk Stocks") +
  guides(colour=guide_legend(title="Series and Forecasts"))

Parameters Chosen

# alpha and l_0 from exponential smoothing
milkexpsm$model$fit$par
## [1]  0.99990 81.29198
# AR(1) Coefficient
milkAR1selection$coef
##       ar1 
## 0.9783295
# AR(4) Coefficients
milkAR4selection$coef
##         ar1         ar2         ar3         ar4   intercept 
##  0.98568158 -0.09175605 -0.11115186 -0.27093352 95.99392283

Implementation

milksnaive<-snaive(condmilk,h=12) #Seasonal Naive forecast, 12 months out
milkdrift<-rwf(condmilk,drift=TRUE,h=12) #Drift forecast, 12 months out

Plot

#Plot the forecasts
autoplot(condmilk,series="Observed")+
  autolayer(milksnaive, PI=FALSE, series="Seasonal Naive")+
  autolayer(milkdrift, PI=FALSE,series="Drift")+
  ggtitle("Forecasts for Monthly Milk Stocks") +
  guides(colour=guide_legend(title="Series and Forecasts"))

Adding External Data

Random methods

Exercise: Try it yourself

Choices: Finding a “Good” method

Loss Functions

Examples

Evaluating Loss

Evaluating Parametric Methods

Evaluation

far4 <- function(x, h){forecast(Arima(x, order=c(4,0,0)), h=h)}
AR4forecasterrors<-tsCV(condmilk,far4,h=1)
#Rolling Forecast Error RMSE and MAE
RMSEfc<-sqrt(mean((AR4forecasterrors)^2,na.rm = TRUE))
MAEfc<-mean(abs(AR4forecasterrors),na.rm = TRUE)
#Compare RMSE and MAE on residuals
RMSEres<-sqrt(mean((milkAR4selection$residuals)^2,na.rm = TRUE))
MAEres<-mean(abs(milkAR4selection$residuals),na.rm = TRUE)
library(knitr)
library(kableExtra)
mse_tbl <- data.frame(Method = c("Residual", "Rolling"),RMSE = c(RMSEres, RMSEfc), MAE = c(MAEres, MAEfc))

kable(mse_tbl) %>%
  kable_styling(full_width = F) %>%
  column_spec(1,border_right = T)
Method RMSE MAE
Residual 12.73835 8.905466
Rolling 15.42559 10.235622

Train vs Test Set

Train vs Test Set

milktrain<-window(condmilk,end=c(1977,12)) #Until 1978
milktest<-window(condmilk,start=c(1978,1)) #After 1978                  
milkAR4train<-Arima(milktrain,order=c(4,0,0))
#Constructing forecast on test data requires some ugly code: hidden
S<-length(milktrain)           #Length of training data series
testlength<-length(milktest)   #Length of test data series
p<-4 #Number of lags used in autoregression
#Arima library reports mean rather than constant term in our notation
#So need to transform results to get numbers in formula on slide 8
theta0<-(1-sum(milkAR4train$coef[1:p]))*milkAR4train$coef[p+1] 
milkAR4testpred<-c() #Make list of forecasts
#Use selected parameters to construct forecast
for(t in 1:testlength){
  milkAR4testpred[t]<-milkAR4train$coef[p:1]%*%condmilk[(S-p+t):(S+t-1)]+theta0
}
milkAR4testforecasts<-ts(milkAR4testpred,start=c(1978,1),frequency=12) #Convert to time series
#Test set RMSE and MAE
RMSE<-sqrt(mean((milkAR4testforecasts-milktest)^2))
MAE<-mean(abs(milkAR4testforecasts-milktest))
err_tbl <- data.frame(Method = c("Test Set Error"),RMSE = c(RMSE), MAE = c(MAE))

kable(err_tbl) %>%
  kable_styling(full_width = F) %>%
  column_spec(1,border_right = T)
Method RMSE MAE
Test Set Error 7.602968 5.952242

Plot

#Plot results
autoplot(milktrain, series="Training Set Data")+
  autolayer(milktest,series="Test Set Data")+
  autolayer(milkAR4testforecasts,series="Forecast")+
  ggtitle("Forecasts for Monthly Milk Stocks") +
  guides(colour=guide_legend(title="Series and Forecasts"))

The problem of induction

David Hume. Source: Wikimedia

David Hume. Source: Wikimedia

The problem of induction

Next topic