Outline

Probability Models and Hidden Variables

The Moving Average Model: Origin

Slutsky’s Moving Average (cf Mahon & Davies 2009)

Slutsky’s Moving Average (cf Mahon & Davies 2009)

The Moving Average Model

Properties

Challenge: Identifiability

Relationship to Autoregression Model

Estimation

Combinations: ARMA models

ARIMA models

Seasonal Models

Application Continued: Macroeconomic Forecasts

#Libraries
library(fredr) # Data from FRED API
library(fpp2) #Forecasting and Plotting tools
library(vars) #Vector Autoregressions
library(knitr) #Use knitr to make tables
library(kableExtra) #Extra options for tables
library(dplyr) #Data Manipulation
library(tseries) #Time series functions including stationarity tests
library(gridExtra)  #Graph Display

# Package "BMR" for BVAR estimation is not on CRAN, but is instead maintained by an individual 
# It must be installed directly from the Github repo: uncomment the following code to do so

# library(devtools) #Library to allow downloading packages from Github
# install_github("kthohr/BMR")
# library(BMR) #Bayesian Macroeconometrics in R

# Note that if running this code on Kaggle, internet access must be enabled to download and install the package
# If installed locally, there may be difficulties due to differences in your local environment (in particular, versions of C++)
# For this reason, relying local installation is not recommended unless you have a spare afternoon to dig through help files

#An alternative, similar library, is BVAR: it is on CRAN
library(BVAR) #Bayesian Vector Autoregressions

##Obtain and transform NIPA Data (cf Lecture 08)

fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class

## Load Series: Series choices and names as in Litterman (1986)

RGNP<-fredr(series_id = "GNPC96",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           units="cch") #Real Gross National Product, log change

INFLA<-fredr(series_id = "GNPDEF",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           units="cch") #GNP Deflator, log change

UNEMP<-fredr(series_id = "UNRATE",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           frequency="q") #Unemployment Rate, quarterly

M1<-fredr(series_id = "M1SL",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           frequency="q",
           units="log") #Log M1 Money Stock, quarterly

INVEST<-fredr(series_id = "GPDI",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           units="log") #Log Gross Domestic Private Investment

# The 4-6 month commercial paper rate series used in Litterman (1986) has been discontinued: 
# For sample continuity, we merge the series for 3 month commercial paper rates from 1971-1997 with the 3 month non-financial commercial paper rate series
# This series also has last start date, so it dictates start date for series

CPRATE1<-fredr(series_id = "WCP3M",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("1996-10-01"),
           vintage_dates = as.Date("2021-03-29"),
           frequency="q") #3 Month commercial paper rate, quarterly, 1971-1997

CPRATE2<-fredr(series_id = "CPN3M",
           observation_start = as.Date("1997-01-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29"),
           frequency="q") #3 Month AA nonfinancial commercial paper rate, quarterly, 1997-2018

CPRATE<-full_join(CPRATE1,CPRATE2) #Merge 2 series to create continuous 3 month commercial paper rate series from 1971-2018

CBI<-fredr(series_id = "CBI",
           observation_start = as.Date("1971-04-01"),
           observation_end = as.Date("2020-01-01"),
           vintage_dates = as.Date("2021-03-29")) #Change in Private Inventories

#Format the series as quarterly time series objects, starting at the first date
rgnp<-ts(RGNP$value,frequency = 4,start=c(1971,2),names="Real Gross National Product") 
infla<-ts(INFLA$value,frequency = 4,start=c(1971,2),names="Inflation")
unemp<-ts(UNEMP$value,frequency = 4,start=c(1971,2),names="Unemployment")
m1<-ts(M1$value,frequency = 4,start=c(1971,2),names="Money Stock")
invest<-ts(INVEST$value,frequency = 4,start=c(1971,2),names="Private Investment")
cprate<-ts(CPRATE$value,frequency = 4,start=c(1971,2),names="Commercial Paper Rate")
cbi<-ts(CBI$value,frequency = 4,start=c(1971,2),names="Change in Inventories")


#Express as a data frame
macrodata<-data.frame(rgnp,infla,unemp,m1,invest,cprate,cbi)

nlags<-6 # Number of lags to use
nseries<-length(macrodata[1,]) #Number of series used

Series<-c("Real GNP Growth","Inflation","Unemployment","Money Stock","Private Investment","Commercial Paper Rate","Change in Inventories")
#Use auto.arima to choose AR order after KPSS test without trend
#Do this also for MA, and for ARMA
ARIstatmodels<-list()
IMAstatmodels<-list()
ARIMAstatmodels<-list()
Integrationorder<-list()
ARorder<-list()
MAorder<-list()
ARorder2<-list()
MAorder2<-list()


for (i in 1:nseries){
  ARIstatmodels[[i]]<-auto.arima(macrodata[,i],max.q=0,seasonal=FALSE) #Apply auto.arima set to (nonseasonal) ARI only
  IMAstatmodels[[i]]<-auto.arima(macrodata[,i],max.p=0,seasonal=FALSE) #Apply auto.arima set to (nonseasonal) IMA only
  ARIMAstatmodels[[i]]<-auto.arima(macrodata[,i],seasonal=FALSE) #Apply auto.arima set to (nonseasonal) ARIMA
  Integrationorder[i]<-ARIMAstatmodels[[i]]$arma[6] #Integration order chosen (uses KPSS Test)
  ARorder[i]<-ARIstatmodels[[i]]$arma[1] #AR order chosen in AR only (uses AICc)
  MAorder[i]<-IMAstatmodels[[i]]$arma[2] #MA order chosen in MA only (uses AICc)
  ARorder2[i]<-ARIMAstatmodels[[i]]$arma[1] #AR order chosen in ARMA (uses AICc)
  MAorder2[i]<-ARIMAstatmodels[[i]]$arma[2] #MA order chosen in ARMA (uses AICc)
  
}

Estimated AR, MA, and ARMA orders for Macro Series

armamodels<-data.frame(as.numeric(Integrationorder),as.numeric(ARorder),
                       as.numeric(MAorder),as.numeric(ARorder2),as.numeric(MAorder2))

rownames(armamodels)<-Series

colnames(armamodels)<-c("d","p (AR only)","q (MA only)","p (ARMA)","q (ARMA)")

armamodels %>%
kable(caption="Autoregression, Moving Average, and ARMA Models") %>%
  kable_styling(bootstrap_options = "striped")
Autoregression, Moving Average, and ARMA Models
d p (AR only) q (MA only) p (ARMA) q (ARMA)
Real GNP Growth 0 2 3 1 1
Inflation 1 4 1 0 1
Unemployment 1 1 3 1 0
Money Stock 1 2 3 1 1
Private Investment 1 1 1 1 0
Commercial Paper Rate 1 3 2 0 2
Change in Inventories 1 0 0 0 0

Forecasts from ARI, IMA, and ARIMA

#Construct Forecasts of Each Series by Univariate ARI, IMA, ARIMA models, with 95% confidence intervals
ARIfcsts<-list()
ARIMAfcsts<-list()
IMAfcsts<-list()
for (i in 1:nseries) {
  ARIfcsts[[i]]<-forecast::forecast(ARIstatmodels[[i]],h=20,level=95)
  ARIMAfcsts[[i]]<-forecast::forecast(ARIMAstatmodels[[i]],h=20,level=95)
  IMAfcsts[[i]]<-forecast::forecast(IMAstatmodels[[i]],h=20,level=95)
}

forecastplots<-list()
for (i in 1:nseries){
pastwindow<-window(macrodata[,i],start=c(2000,1))  
#Plot all forecasts
forecastplots[[i]]<-autoplot(pastwindow)+
  autolayer(ARIMAfcsts[[i]],alpha=0.4,series="ARIMA")+
  autolayer(ARIfcsts[[i]],alpha=0.4,series="ARI")+
  autolayer(IMAfcsts[[i]],alpha=0.4,series="IMA")+
  labs(x="Date",y=colnames(macrodata)[i],title=Series[i])
}

grid.arrange(grobs=forecastplots,nrow=4,ncol=2)

Real GNP Growth: Series, ACF, PACF, Estimated ARMA(1,1) Roots

rgnpplots<-list()

rgnpplots[[1]]<-autoplot(rgnp)+labs(x="Date",y="Percent Growth",title="Real GNP Growth")
rgnpplots[[2]]<-ggAcf(rgnp)+labs(title="Autocorrelation Function")
rgnpplots[[3]]<-ggPacf(rgnp)+labs(title="Partial Autocorrelation Function")
rgnpplots[[4]]<-autoplot(ARIMAstatmodels[[1]])
grid.arrange(grobs=rgnpplots,nrow=2,ncol=2)

Exercise: House Prices

The Multivariate Case

Application: Litterman (1986) Macroeconomic Forecasts

Forecasts from ARIMA (Red) and BVAR (Blue)

#Label observations with dates
rownames(macrodata)<-M1$date

#Set priors: use hierarchical approach, with default scales, to account for uncertainty
#Note that defaults are for series in levels, while some here are growth rates
mn <- bv_minnesota(lambda = bv_lambda(mode = 0.2, sd = 0.4, min = 0.0001, max = 5),
                   alpha = bv_alpha(mode = 2), 
                   var = 1e07)

#Ghost samples to set priors on initial conditions: 
# SOC shifts towards independent unit roots
# SUR shifts toward shared unit root
# Hierarchical approach allows strength to be determined by data
#soc <- bv_soc(mode = 1, sd = 1, min = 1e-04, max = 50)
#sur <- bv_sur(mode = 1, sd = 1, min = 1e-04, max = 50)

priors <- bv_priors(hyper = c("lambda"), #Choose lambda by hierarchical method, leave others fixed at preset
                    mn = mn) #Set Minnesota prior as defined above

#Add options soc = soc, sur = sur to set initial observations priors
# For this data, hierarchical estimation sets fairly smal values for these, and resulting forecasts are about the same 
# except with slightly narrower intervals for far future forecasts

#Set up MCMC parameters: use default initialization except that scaling is adjusted automatically in burnin
#If this is not enough, may need to adjust manually: check diagnostics
mh <- bv_metropolis(adjust_acc = TRUE, acc_lower = 0.25, acc_upper = 0.45)
# Usually automatic adjustment will work okay so long as burnin and n_draws both long enough
set.seed(42) #Fix random numbers for reproducibility
#Do the actual fitting.
run <- bvar(macrodata, 
            lags = 10, 
            n_draw = 25000, n_burn = 10000, #Run 25000 draws, discard first 10000 to get 15000 samples
            n_thin = 1, #Don't discard intermediate draws
            priors = priors, #Use above priors
            mh = mh, #Use MCMC sampler settings defined above
            verbose = FALSE) #Report sampler progress?
#Sample from posterior predictive distribution 
predict(run) <- predict(run, horizon = 20, conf_bands = c(0.05, 0.16))

## Can plot using following commands, but I will use ggplot for compatibility with forecast libraries
#plot(predict(run),area=TRUE,t_back = 190)

## With library(BVARverse) also have plotting option
#bv_ggplot(predict(run),t_back = 190)
#Alternative approach: BMR library
# Available at https://www.kthohr.com/bmr.html
#Uses slightly different formulation of and notation for Minnesota prior
#There are some useful options available in this library, 
# but it is not on CRAN and is not compatible with Kaggle's backend, so not run by default

#Convert to a data frame
bvarmacrodata <- data.matrix(macrodata) 

#Set up Minnesota-prior BVAR object, and sample from posterior by MCMC
# See https://www.kthohr.com/bmr_docs_vars_bvarm.html for syntax documentation: manual is out of date
bvar_obj <- new(bvarm)

#Construct BVAR with nlags lags and a constant
bvar_obj$build(data_endog=bvarmacrodata,cons_term=TRUE,p=nlags)

#Set random walk prior mean for all variables
coef_prior=c(1,1,1,1,1,1,1) 
# Set prior parameters (1,0.2,1) with harmonic decay
bvar_obj$prior(coef_prior=coef_prior,var_type=1,decay_type=1,HP_1=1,HP_2=0.2,HP_3=1,HP_4=2)

#Sample from BVAR with 10000 draws of Gibbs Sampler
bvar_obj$gibbs(10000)

#Construct BVAR Forecasts
bvarfcst<-BMR::forecast(bvar_obj,periods=20,shocks=TRUE,plot=FALSE,varnames=colnames(macrodata),percentiles=c(.05,.50,.95),
    use_mean=FALSE,back_data=0,save=FALSE,height=13,width=11)

#Warning: command is incredibly slow if plot=TRUE is on, fast otherwise
# Appears to be issue with plotting code in BMR package, which slows down plotting to visualize
# With too many lags and series, have to wait through hundreds of forced pauses
# ADD VAR and ARIMA forecasts to plot

forecastseriesplots<-list()

#Plot ARIMA model forecast along with BVAR forecasts, plus respective 95% intervals
#Commented out alternative prior method
for (i in 1:nseries){
  BVAR<-ts(run$fcast$quants[3,,i],start=c(2020,2),frequency=4,names=Series[i]) #Mean
  lcband<-ts(run$fcast$quants[1,,i],start=c(2020,2),frequency=4,names=Series[i]) #5% Lower confidence band
  ucband<-ts(run$fcast$quants[5,,i],start=c(2020,2),frequency=4,names=Series[i]) #95% Upper confidence band
  fdate<-time(lcband) #Extract date so geom_ribbon() knows what x value is
  bands<-data.frame(fdate,lcband,ucband) #Collect in data frame
  pastwindow<-window(macrodata[,i],start=c(2000,1))
forecastseriesplots[[i]]<-autoplot(pastwindow)+
  autolayer(ARIMAfcsts[[i]],series="ARIMA",alpha=0.4)+autolayer(BVAR,series="BVAR",color="blue")+
  geom_line(aes(x=fdate,y=ucband),data=bands,color="blue",alpha=0.4)+
  geom_line(aes(x=fdate,y=lcband),data=bands,color="blue",alpha=0.4)+
  labs(x="Date",y=colnames(macrodata)[i],title=Series[i])
}

grid.arrange(grobs=forecastseriesplots,nrow=4,ncol=2)

Interpretation

Conclusions

References