Model Combination

Combining Forecasts

A Unified Framework

Model Selection

Model Averaging - Equal Weights

Model Averaging - Risk Minimization

Alternate Selection Approaches

Model Combination

Application: Model Combination for Oil Price Forecasting

Model predictions and combinations

library(knitr)
library(kableExtra)
library(fredr) #FRED data
library(fpp2) #Forecasting
library(tseries) #Time series commands, including ARCH models
library(rugarch) #Many versions of ARCH models and other variance fitting models
library(rstan) #Bayesian estimation
library(loo) #Bayesian Model selection
library(bridgesampling) #Bayesian model comparison and averaging
library(mgcv) #Contains methods for fitting constrained regression, including best weighted average problems
library(gridExtra) #Graph Display

fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class

WTI<-fredr(series_id = "MCOILWTICO",units="chg") #Monthly Growth in West Texas Intermediate Crude Oil Price
      #Measure of Oil Prices at point for import to US, before refining

#Format the series as monthly time series object, starting at the first date
oilgrowth<-window(ts(WTI$value,frequency=12,start=c(1986,1),names="OilPrice"),start=c(1986,2))

# Fit a set of models to the series

# ARMA(1,1)
armaog<-arima(oilgrowth,order=c(1,0,1))

# ARMA(1,1) Forecast

armaogfc<-forecast(armaog,h=5)

#ARCH(2)
#y_t=e_t(b_1+b_2y_{t-1}^2+b_3y_{t-2}^2)

#Use "garch" command in tseries library to fit by maximum likelihood
#archog<-garch(oilgrowth,order=c(0,2))

#Use rugarch library to fit ARCH(2) model with mean given by AR(1)

spec <- ugarchspec(variance.model = list(model = "sGARCH", 
                                         garchOrder = c(0,2), 
                                         submodel = NULL, 
                                         external.regressors = NULL, 
                                         variance.targeting = FALSE), 

                   mean.model     = list(armaOrder = c(1, 0), 
                                         external.regressors = NULL, 
                                         distribution.model = "norm", 
                                         start.pars = list(), 
                                         fixed.pars = list()))

# Fit model by MLE
archarog <- ugarchfit(spec = spec, data = oilgrowth, solver.control = list(trace=0))

# Produce forecasts
archarogfc <- ugarchforecast(archarog, n.ahead=5)

#Use rugarch library to fit ARCH(2) model with mean given by constant

spec1 <- ugarchspec(variance.model = list(model = "sGARCH", 
                                         garchOrder = c(0,2), 
                                         submodel = NULL, 
                                         external.regressors = NULL, 
                                         variance.targeting = FALSE), 

                   mean.model     = list(armaOrder = c(0, 0), 
                                         external.regressors = NULL, 
                                         distribution.model = "norm", 
                                         start.pars = list(), 
                                         fixed.pars = list()))

# Fit model by MLE
archog <- ugarchfit(spec = spec1, data = oilgrowth, solver.control = list(trace=0))

# Produce forecasts
archogfc <- ugarchforecast(archog, n.ahead=5)

# Produce series of predictions

#Predicted values from ARMA model
armaogpred<-oilgrowth-armaog$residuals 
archogpred<-oilgrowth-archog@fit$residuals
archarogpred<-oilgrowth-archarog@fit$residuals

# Compare RMSE
armaogerr<-accuracy(armaogpred,oilgrowth)
archogerr<-accuracy(archogpred,oilgrowth)
archarogerr<-accuracy(archarogpred,oilgrowth)

s0<-c(1,0,0) #ARMA model minimizes MSE

# Model combination

#Unrestricted combination is linear regression
mcombo<-lm(oilgrowth~0+armaogpred+archogpred+archarogpred)

combopredict<-ts(predict(mcombo),frequency=12,start=c(1986,2))

s2<-mcombo$coefficients

#Restricted combo to weighted average fit by constrained least squares
#See ?pcls help for syntax: below code ensures coefficients sum to 1 and are non-negative
M<-list(X=matrix(0,length(oilgrowth),3),p=c(0.33,0.33,0.34),off=array(0,0),S=list(),
Ain=matrix(0,3,3),bin=c(0,0,0),C=matrix(1,1,3),sp=array(0,0),y=oilgrowth,w=oilgrowth*0+1)
M$X[,1]<-armaogpred
M$X[,2]<-archogpred
M$X[,3]<-archarogpred
M$Ain[1,1]<-1
M$Ain[2,2]<-1
M$Ain[3,3]<-1
#Find constrained least squares solution
pcls(M)->M$p

s1<-M$p #MSE optimizing weighted average of 3 models
#Optimal average
maverage<-s1[1]*armaogpred+s1[2]*archogpred+s1[3]*archarogpred

#Plot series and predictions
autoplot(oilgrowth)+autolayer(armaogpred)+autolayer(archogpred)+
  autolayer(archarogpred)+autolayer(maverage)+autolayer(combopredict)+ggtitle("Oil Price Growth and Predicted Values")+
  ylab("Change, Dollars per Barrel")

Cross Validated Predictions and Optimal Combinations

## Produce time series CV forecast errors from each method

farma <- function(x, h){forecast(Arima(x, order=c(1,0,1)), h=h)}
e1 <- tsCV(oilgrowth, farma, h=1)

#Produce predictions from rolling forecasts
armacvpred<-window(oilgrowth,start=c(1987,1),end=c(2019,1))-window(e1,start=c(1987,1),end=c(2019,1))

# rugarch library has its own function for tsCV forecasts, called ugrachroll
# For computational speed, choose new parameters only every 5 data points

#For basic ARCH(2) model
mod1 = ugarchroll(spec1, data = oilgrowth, n.ahead = 1, 
n.start = 10,  refit.every = 5, refit.window = "recursive", 
solver = "hybrid", fit.control = list(), calculate.VaR = FALSE,
keep.coef = TRUE)

#Mean predictions
archcvpred<-ts(mod1@forecast$density$Mu,frequency=12,start=c(1986,12))

#For AR(1)-ARCH(2) model
mod = ugarchroll(spec, data = oilgrowth, n.ahead = 1, 
n.start = 10,  refit.every = 5, refit.window = "recursive", 
solver = "hybrid", fit.control = list(), calculate.VaR = FALSE,
keep.coef = TRUE)

#Mean predictions
archarcvpred<-ts(mod@forecast$density$Mu,frequency=12,start=c(1986,12))

#Truncate series to window of predictions
ogwindow<-window(oilgrowth,start=c(1987,1),end=c(2019,1))
archcv<-window(archcvpred,start=c(1987,1),end=c(2019,1))
archarcv<-window(archarcvpred,start=c(1987,1),end=c(2019,1))

# Compare RMSE
armacverr<-accuracy(armacvpred,ogwindow)
archcverr<-accuracy(archcv,ogwindow)
archarcverr<-accuracy(archarcv,ogwindow)

s0cv<-c(1,0,0) #ARMA model still minimizes MSE

#Unrestricted combination is linear regression
mcombocv<-lm(ogwindow~0+armacvpred+archcv+archarcv)

combocvpredict<-ts(predict(mcombocv),frequency=12,start=c(1987,1))

s2cv<-mcombocv$coefficients

#Restricted combo to weighted average fit by constrained least squares
#See ?pcls help for syntax: below code ensures coefficients sum to 1 and are non-negative
M2<-list(X=matrix(0,length(ogwindow),3),p=c(0.33,0.33,0.34),off=array(0,0),S=list(),
Ain=matrix(0,3,3),bin=c(0,0,0),C=matrix(1,1,3),sp=array(0,0),y=ogwindow,w=ogwindow*0+1)
M2$X[,1]<-armacvpred
M2$X[,2]<-archcv
M2$X[,3]<-archarcv
M2$Ain[1,1]<-1
M2$Ain[2,2]<-1
M2$Ain[3,3]<-1
#Find constrained least squares solution
pcls(M2)->M2$p

s1cv<-M2$p #MSE optimizing weighted average of 3 models

#Optimal average
maveragecv<-s1cv[1]*armacvpred+s1cv[2]*archcv+s1cv[3]*archarcv

#Plot Forecasts and combinations

autoplot(ogwindow)+autolayer(armacvpred)+autolayer(archcv)+autolayer(archarcv)+
  autolayer(maveragecv)+autolayer(combocvpredict)+ggtitle("Oil Price Growth and CV Predicted Values")+
  ylab("Change, Dollars per Barrel")

Combination Results

Weight<-c("ARMA(1,1)","ARCH(2)","AR(1)ARCH(2)")
seltable<-data.frame(Weight,s0,s1,s2,s0cv,s1cv,s2cv)
colnames(seltable)<-c("Model","Selection","Average","Combination",
                       "Selection (CV)","Average (CV)","Combination (CV)")

kable(seltable,
  caption="Optimal Model Weights by Method") %>%
  kable_styling(bootstrap_options = "striped")
Optimal Model Weights by Method
Model Selection Average Combination Selection (CV) Average (CV) Combination (CV)
armaogpred ARMA(1,1) 1 0.998383724 1.9312723 1 0.50420777 0.5019803
archogpred ARCH(2) 0 0.001616276 0.0818078 0 0.04215205 -2.2076654
archarogpred AR(1)ARCH(2) 0 0.000000000 -0.9439869 0 0.45364018 0.4383453

Bayesian Approach

Bayesian Model Averaging

Bayes Factors

Limitations of Bayes Factors

Combining Bayesian Forecasts

Application: Bayesian Evaluation of Oil Price Models

options(mc.cores = parallel::detectCores()) #Use parallel computing when running MCMC
rstan_options(auto_write = TRUE) #Do not recompile Stan code every time file is saved

#Write code for AR(1)-ARCH(2) model in Stan
#Follows Stan User's Guide Ch 2.1-2.2, with modifications
stan_code_ararch<-"
data {
  int<lower=0> T;
  real r[T];
}
parameters {
  real mu;
  real<lower=-1,upper=1> ar1;
  real<lower=0> omega;
  real<lower=0,upper=1> beta1;
  real<lower=0,upper=(1-beta1)> beta2;

}
transformed parameters {
  real<lower=0> sigma[T-3];
  real lagval[T-1];
  for (t in 1:(T-1))
    lagval[t] = mu + ar1*r[t];
  for (t in 4:T)
    sigma[t-3] = sqrt(omega
                     + beta1 * pow(r[t-1] - lagval[t-2], 2)
                     + beta2 * pow(r[t-2] - lagval[t-3], 2));
}
model {
  // Priors: N(0,1) on everything: use target syntax because otherwise likelihood calculated only up to constant
  target += normal_lpdf(mu | 0, 1);
  target += normal_lpdf(ar1 | 0, 1);
  target += normal_lpdf(omega | 0, 1);
  target += normal_lpdf(beta1 | 0, 1);
  target += normal_lpdf(beta2 | 0, 1);
  // Likelihood: Normal, with mean given by AR, variance by ARCH
  target += normal_lpdf(r[4:T] | lagval[3:(T-1)], sigma);
  // Equivalent to r[4:T] ~ normal(lagval[3:(T-1)], sigma); for posterior, but not likelihood
}

generated quantities {
real r_tilde; // Samples from posterior predictive distribution

r_tilde = normal_rng(mu + ar1*r[T],sqrt(omega
                     + beta1 * pow(r[T] - mu - ar1*r[T-1], 2)
                     + beta2 * pow(r[T-1] - mu - ar1*r[T-2], 2)));  //Simulate draw from period T+1 conditional likelihood 

// vector[T-3] log_lik; //Value of log likelihood at parameter draws
// for (t in 4:T) log_lik[t] = normal_lpdf(r[t] | lagval[t-1], sigma[t-3]);
}"


#Write code for ARCH(2) model in Stan
#Follows Stan User's Guide Ch 2.1-2.2, with modifications
#Sampling seems to fail for this model: not sure why yet
stan_code_arch<-"
data {
  int<lower=0> T;
  real r[T];
}
parameters {
  real mu;
  real<lower=0> omega;
  real<lower=0,upper=1> beta1;
  real<lower=0,upper=(1-beta1)> beta2;

}
transformed parameters {
  real<lower=0> sigma[T-3];
  real lagval[T-1];
  for (t in 1:(T-1))
    lagval[t] = mu;
  for (t in 4:T)
    sigma[t-3] = sqrt(omega
                     + beta1 * pow(r[t-1] - lagval[t-2], 2)
                     + beta2 * pow(r[t-2] - lagval[t-3], 2));
}
model {
  // Priors: N(0,1) on everything: use target syntax because otherwise likelihood calculated only up to constant
  target += normal_lpdf(mu | 0, 1);
  target += normal_lpdf(omega | 0, 1);
  target += normal_lpdf(beta1 | 0, 1);
  target += normal_lpdf(beta2 | 0, 1);
  // Likelihood: Normal, with mean given by AR, variance by ARCH
  target += normal_lpdf(r[4:T] | lagval[3:(T-1)], sigma);
  // Equivalent to r[4:T] ~ normal(lagval[3:(T-1)], sigma); for posterior, but not likelihood
}

generated quantities {
real r_tilde; // Samples from posterior predictive distribution

r_tilde = normal_rng(mu,sqrt(omega
                     + beta1 * pow(r[T] - mu, 2)
                     + beta2 * pow(r[T-1] - mu, 2)));  //Simulate draw from period T+1 conditional likelihood 

// vector[T-3] log_lik; //Value of log likelihood at parameter draws
// for (t in 4:T) log_lik[t] = normal_lpdf(r[t] | lagval[t-1], sigma[t-3]);
}"


#AR(3) Model for comparison
stan_code_ar<-"
data {
  int<lower=0> T;
  vector[T] r;
}
parameters {
  real alpha;
  real beta;
  real gamma;
  real delta;
  real<lower=0> sigma;
}
model {
  target += normal_lpdf(alpha | 0, 1);
  target += normal_lpdf(beta | 0, 1);
  target += normal_lpdf(gamma | 0, 1);
  target += normal_lpdf(delta | 0, 1);
  target += normal_lpdf(r[4:T] | alpha+beta*r[3:(T-1)]+gamma*r[2:(T-2)]+delta*r[1:(T-3)], sigma);
}
generated quantities {
real r_tilde; // Samples from posterior predictive distribution

r_tilde = normal_rng(alpha+beta*r[T]+gamma*r[T-1]+delta*r[T-2],sigma);  //Simulate draw from period T+1 conditional likelihood 
}"

#Steal code for ARMA(1,1) from Stan manual
#Fun fact: this code has major problems with sampling, making MCMC results unreliable
#All diagnostics suggest not to trust it, so exclude it from out set of models
stan_code_arma<-"
data {
  int<lower=1> T;            // num observations
  real r[T];                 // observed outputs
}
parameters {
  real mu;                   // mean coeff
  real phi;                  // autoregression coeff
  real theta;                // moving avg coeff
  real<lower=0> sigma;       // noise scale
}
transformed parameters {
  vector[T] nu;              // prediction for time t
  vector[T] err;             // error for time t
  nu[1] = mu + phi * mu;    // assume err[0] == 0
  err[1] = r[1] - nu[1];
  for (t in 2:T) {
    nu[t] = mu + phi * r[t-1] + theta * err[t-1];
    err[t] = r[t] - nu[t];
  }
}
model {
  target += normal_lpdf(mu | 0, 1);         // priors
  target += normal_lpdf(phi | 0, 1);
  target += normal_lpdf(theta | 0, 1);
  target += normal_lpdf(sigma | 0, 1);
  target += normal_lpdf(err | 0, sigma);    // likelihood
}
generated quantities {
real r_tilde; // Samples from posterior predictive distribution

r_tilde = normal_rng(mu + phi * r[T] + theta * err[T],sigma);  //Simulate draw from period T+1 conditional likelihood 
}
"

stan_model_ararch<-stan_model(model_code = stan_code_ararch,model_name = "ARARCHModel") 
stan_model_arch<-stan_model(model_code = stan_code_arch,model_name = "ARCHModel") 
stan_model_ar<-stan_model(model_code = stan_code_ar,model_name = "ARModel") 
#stan_model_arma<-stan_model(model_code = stan_code_arma,model_name = "ARMAModel") 
Tlength<-length(oilgrowth)
# Format data as list to feed to model
stan_data<-list(T=Tlength, r=oilgrowth)

# Run the code to draw samples from the posteriors
fit_ararch<-sampling(object = stan_model_ararch,data = stan_data, chains = 4, iter = 2000, seed = 4567)
fit_arch<-sampling(object = stan_model_arch,data = stan_data, chains = 4, iter = 2000, seed = 4567)
fit_ar<-sampling(object = stan_model_ar,data = stan_data, chains = 4, iter = 2000, seed = 4567)
#fit_arma<-sampling(object = stan_model_arma,data = stan_data, chains = 4, iter = 2000, seed = 4567, 
#                   control=list(adapt_delta=0.94,max_treedepth=16))

#Display parameter estimates
#print(fit_ararch,pars=c("mu","ar1","omega","beta1","beta2","lp__"),digits_summary=4)
#print(fit_arch,pars=c("mu","omega","beta1","beta2","lp__"),digits_summary=4)
#print(fit_ar,pars=c("alpha","beta","gamma","delta","lp__"),digits_summary=4)

#print(fit_arma,pars=c("mu","phi","theta","sigma","lp__"),digits_summary=4)

#Display posterior predictive distributions
postpredplot<-list()
postpredplot[[1]]<-stan_hist(fit_ararch,pars="r_tilde",bins=70)+ggtitle("AR-ARCH")+xlab("Oil Price Growth")
postpredplot[[2]]<-stan_hist(fit_arch,pars="r_tilde",bins=70)+ggtitle("ARCH")+xlab("Oil Price Growth")
postpredplot[[3]]<-stan_hist(fit_ar,pars="r_tilde",bins=70)+ggtitle("AR(3)")+xlab("Oil Price Growth")

#stan_hist(fit_arma,pars="r_tilde",bins=70)+ggtitle("Posterior Density y_{T+1}, ARMA model")+xlab("Oil Price Growth")

# Compute log marginal likelihood via bridge sampling for models using "bridgesampling" library
M3.bridge <- bridge_sampler(fit_ararch, silent = TRUE)
M2.bridge <- bridge_sampler(fit_arch, silent = TRUE)
M1.bridge <- bridge_sampler(fit_ar, silent = TRUE)
#Obtain Log marginal likelihood
ml3<-M3.bridge$logml
ml2<-M2.bridge$logml
ml1<-M1.bridge$logml

modelposterior<-post_prob(M1.bridge,M2.bridge,M3.bridge)

Marginal Likelihoods and T+1 Posterior Predictive distributions

marglikelihood<-c(ml1,ml2,ml3)
models<-c("AR(3)","ARCH(2)","AR(1)-ARCH(2)")

bayestable<-data.frame(models,marglikelihood,modelposterior)

colnames(bayestable)<-c("Model","Marginal Likelihood","Posterior Model Weight")

kable(bayestable,
  caption="Bayesian Combinations of Oil Price Growth Models") %>%
  kable_styling(bootstrap_options = "striped")
Bayesian Combinations of Oil Price Growth Models
Model Marginal Likelihood Posterior Model Weight
M1.bridge AR(3) -1133.720 0
M2.bridge ARCH(2) -1094.782 0
M3.bridge AR(1)-ARCH(2) -1077.153 1
grid.arrange(grobs=postpredplot,nrow=1,ncol=3)

Dynamic Model Combination

Online Approaches

Conclusions

References