tsCV
) \widehat{R}^{tsCV}(\widehat{f})=\frac{1}{T-h}\sum_{t=1}^{T-h}\ell(y_{t+h},\widehat{f}(\mathcal{Y}_{t}))Arima
, VAR
, ets
, etc, or ex post by CV
auto.arima
, to choose between orders of ARVARselect
computes AIC/BIC for VAR over loss \log\det(\sum_{t=1}^{T-h}(y_{t+h}-\widehat{y}_{t})(y_{t+h}-\widehat{y}_{t})^\prime)glmnet
cv.glmnet
implements cross validation parameter selection, but not time series versionsmooth.spline
as well as in specialized functions in libraries gam
or mgcv
library(fpp2) #Forecasting
library(glmnet) #Lasso and Ridge
library(vars) #Vector Autoregressions
library(fredr) #Access FRED Data
##Obtain and transform NIPA Data (cf Lecture 07)
fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
#US GDP components
GDP<-fredr(series_id = "GDP",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Gross Domestic Product
#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-07"),
vintage_dates = as.Date("2021-03-07")) #Government Consumption Expenditures and Gross Investment
#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product")
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")
#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))
# Construct lags
l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2020,4))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2020,4))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2020,4))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2020,4))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2020,4))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2020,4))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2020,4))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2020,4))
#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2020,4)),length(l1NIPA[,1]),4)
#Spline models, yeah?
library(gam) #loads smoothing spline package
library(mgcv) #A slightly different smoothing spline package
lagDlogGDP<-xvars[,1]
DlogGDP<-NIPAwindow[,1]
#Smoothing spline Regression of GDP on lagged GDP
# Default behavior fits penalty parameter by generalized Cross Validation
# Predict GDP growth using smoothing spline in lagged GDP growth
gdp.spl<-smooth.spline(lagDlogGDP,DlogGDP)
splinepredictions<-predict(gdp.spl)
#Build Plot
gdpdata<-data.frame(xvars[,1],NIPAwindow[,1],splinepredictions$x,splinepredictions$y)
colnames(gdpdata)<-c("LDlGDP","DlGDP","xspline","yspline")
ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
geom_line(aes(x=xspline,y=yspline),color="red",size=1)+
geom_smooth(method=lm,se=FALSE,color="blue")+
ggtitle("Smoothing Spline and Linear Fits of This vs Last Quarter's GDP Growth",
subtitle="Spline bends to fit far off points")+
xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")
# An alternate approach which uses "gam" command to fit spline
# Advantage is that it's built in to ggplot
# Disadvantage is that it uses only default options, so not customizable
# Default values here selct much higher smoothing, and give close to linear fit
# ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
# ggtitle("Spline and OLS Fit of This vs Last Quarter's GDP Growth",
# subtitle="There are two lines which exactly overlap")+
# xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")+
# geom_smooth(method=gam,se=FALSE,color="blue")+
# geom_smooth(method=lm,se=FALSE,color="red")
library(dplyr) #Data manipulation
library(knitr) #Use knitr to make tables
library(kableExtra) #Extra options for tables
library(tibble) #More data manipulation
#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)
# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])
#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=1)$forecast
BICpred<-forecast(BICVAR,h=1)$forecast
#Extract point forecasts
AICfcsts<-select(data.frame(AICpred),ends_with("Forecast"))
BICfcsts<-select(data.frame(BICpred),ends_with("Forecast"))
#Fit LASSO VAR in all variables up to 8 lags
lassovar<-glmnet(xvars,NIPAwindow,family="mgaussian")
#Show range of lambda values from unpenalized to where all variables gone
#lassovar$lambda
#Show coefficients for large value of lambda (heavy penalization, most coefs gone)
#coef(lassovar,s=0.02)
#Show coefficients for intermediate value of lambda (medium penalization, some coefs gone)
#coef(lassovar,s=0.008)
#Show coefficients for small value of lambda (little penalization, most coefs remain)
#coef(lassovar,s=0.001)
## Fit LASSO VAR with 10-fold cross-validation
lassovarCV<-cv.glmnet(xvars,NIPAwindow,family="mgaussian")
#Lambda giving minimum 10-fold Cross Validation Error
CVlambda<-lassovarCV$lambda.min
#Baroque construction to take last period and format it for predict command
#Somehow need 2 rows in prediction value or it throws a fit
#Presumably because matrix coerced to vector and then it thinks it can't multiply anymore
maxT<-length(NIPAwindow[,1]) #index of last period
newX<-t(as.matrix(data.frame(as.vector(t(NIPAwindow[(maxT-8):(maxT-1),])), as.vector(t(NIPAwindow[(maxT-7):maxT,])))))
#Predict 2021Q1 series
cvlassopredictions<-window(ts(predict(lassovarCV,newx = newX)[,,1],start=c(2020,4),frequency=4),start=c(2021,1))
#Fit L2 Penalized VAR in all variables up to 8 lags
ridgevar<-glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")
#Show range of lambda values from unpenalized to where all variables gone
#ridgevar$lambda
#Show coefficients for large value of lambda (heavy penalization, coefs shrink a lot)
#coef(ridgevar,s=10)
#Show coefficients for intermediate value of lambda (medium penalization, coefs shrink a little)
#coef(ridgevar,s=1)
#Show coefficients for small value of lambda (little penalization, coefs close to OLS values)
#coef(ridgevar,s=0.01)
## Fit Ridge VAR with 10-fold cross-validation
ridgevarCV<-cv.glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")
#Lambda giving minimum 10-fold Cross Validation Error
CVlambda2<-ridgevarCV$lambda.min
#Predict 2021Q1 series
cvridgepredictions<-window(ts(predict(ridgevarCV,newx = newX)[,,1],start=c(2020,4),frequency=4),start=c(2021,1))
# #Plot Data Series
# autoplot(window(NIPAdata,start=c(2012,4)))+
# autolayer(cvlassopredictions)+
# autolayer(cvridgepredictions)
# autolayer(AICpred$diff.log.gdp..$mean)+
# autolayer(BICpred$diff.log.gdp..$mean)
#Collect 2021Q1 Forecasts into a data frame
var1table<-data.frame(
t(cvridgepredictions),
t(cvlassopredictions),
t(AICfcsts),
t(BICfcsts)
)
rownames(var1table)<-c("GDP","Consumption","Investment","Government")
#Make Table of Estimated Coefficients
kable(var1table,
col.names=c("Ridge","LASSO","AIC","BIC"),
caption="2021Q1 Forecasts from Different Methods") %>%
kable_styling(bootstrap_options = "striped", font_size = 16)
Ridge | LASSO | AIC | BIC | |
---|---|---|---|---|
GDP | 0.0151859 | 0.0151859 | 0.0061177 | 0.0165646 |
Consumption | 0.0153646 | 0.0153646 | 0.0092199 | 0.0179683 |
Investment | 0.0157096 | 0.0157096 | 0.0118236 | 0.0245480 |
Government | 0.0152194 | 0.0152194 | 0.0247882 | 0.0108015 |
#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(ridgevarCV)$y1),
as.matrix(coef(ridgevarCV)$y2),
as.matrix(coef(ridgevarCV)$y3),
as.matrix(coef(ridgevarCV)$y4))
rownames(zzz)<-rownames(data.frame(as.matrix(coef(ridgevarCV)$y1)))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = spec_color(x))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in Ridge Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G | |
---|---|---|---|---|
(Intercept) | 0.0151858897298007 | 0.0153645580354301 | 0.0157096142355653 | 0.0152194105076093 |
diff.log.gdp.. | 2.30031255499118e-37 | 9.38827596242842e-38 | 5.67710819257864e-37 | 4.1908660740873e-37 |
diff.log.pcec.. | 2.03414871590585e-37 | -1.42003271917175e-38 | 1.03518480719846e-36 | 2.43101694930497e-37 |
diff.log.invest.. | 5.81179178431625e-38 | 4.89847015090077e-38 | 1.72474357338571e-37 | 2.89445812951768e-38 |
diff.log.gce.. | 8.14333315056698e-38 | 4.65662452126473e-38 | -4.97908409989161e-37 | 6.06876408802504e-37 |
diff.log.gdp…1 | 2.66230340418565e-37 | 2.17899432587549e-37 | 5.89720423452404e-38 | 5.11288435463506e-37 |
diff.log.pcec…1 | 2.45438578253927e-37 | 2.72574002810306e-37 | -7.46099917642788e-38 | 4.1874800901528e-37 |
diff.log.invest…1 | 6.16468295468514e-38 | 3.76230895303787e-38 | 1.43513553628022e-37 | 4.89935776717903e-38 |
diff.log.gce…1 | 2.88850420892817e-38 | -2.0343767595241e-39 | -4.35758744418575e-37 | 4.39598403688305e-37 |
diff.log.gdp…2 | 1.69764387502009e-37 | 1.38669330320158e-37 | -5.53541951770884e-37 | 6.43937167156774e-37 |
diff.log.pcec…2 | 2.57387614260426e-37 | 2.31769546123258e-37 | -1.50074670774808e-37 | 4.96432317583687e-37 |
diff.log.invest…2 | 2.05248586117648e-38 | 8.68966908404487e-40 | -4.0244191573353e-38 | 8.19254014008568e-38 |
diff.log.gce…2 | -1.52460714936195e-38 | 5.71964171443974e-38 | -6.40050604726444e-37 | 3.00746325218341e-37 |
diff.log.gdp…3 | 1.02868823860545e-37 | 1.79075449325731e-37 | -8.72166554492289e-37 | 6.23993172561969e-37 |
diff.log.pcec…3 | 1.91306415158673e-37 | 1.56530919140519e-37 | -1.39775744944728e-37 | 4.57958482381477e-37 |
diff.log.invest…3 | 3.99606222132951e-39 | 2.86479073484879e-38 | -2.46853969613774e-37 | 1.07679973010426e-37 |
diff.log.gce…3 | -1.70497629422387e-38 | 3.24506547360457e-38 | -2.91745675031562e-37 | 1.4731800906771e-37 |
diff.log.gdp…4 | -9.47944652278077e-39 | 8.20701352695042e-38 | -9.8328564921561e-37 | 3.94870645320733e-37 |
diff.log.pcec…4 | 1.41634175467069e-37 | 3.09685052861973e-37 | -9.95791656490379e-37 | 4.61782046707173e-37 |
diff.log.invest…4 | -2.58637064333019e-38 | -3.58456162420379e-38 | -1.53548027819855e-37 | 6.26430258320072e-38 |
diff.log.gce…4 | 4.46754017745955e-38 | 9.68030481339711e-38 | -1.69478106557769e-38 | 3.76103120629892e-38 |
diff.log.gdp…5 | 6.29617484125634e-38 | 1.51638436741035e-37 | -5.19448139677564e-37 | 2.21421305380236e-37 |
diff.log.pcec…5 | 1.29010668326766e-37 | 1.52005958450989e-37 | -1.5993318690095e-37 | 2.33780106386364e-37 |
diff.log.invest…5 | -1.59227211270025e-38 | 4.86621283586907e-39 | -1.84306909715284e-37 | 4.02670363270379e-38 |
diff.log.gce…5 | 1.06431563250505e-37 | 1.13658269140945e-37 | 3.1415103584003e-37 | 5.11064147737253e-38 |
diff.log.gdp…6 | 9.21687780778919e-38 | 1.81069175743602e-37 | -2.22856911137888e-37 | 1.45859585321776e-37 |
diff.log.pcec…6 | 1.92215622922207e-37 | 3.056783686707e-37 | -2.1135329401155e-37 | 2.35697554061445e-37 |
diff.log.invest…6 | -1.47522987657778e-38 | -6.03934804215541e-39 | -5.93676171946803e-38 | 3.79610675225019e-39 |
diff.log.gce…6 | 1.01173813993219e-37 | 8.62521387934146e-38 | 3.50630401000713e-37 | 3.64942205833305e-38 |
diff.log.gdp…7 | 1.01868469749412e-37 | 1.87365890427143e-37 | -1.60968447243311e-37 | 8.74068306105839e-38 |
diff.log.pcec…7 | 1.30416714458953e-37 | 1.9113863912783e-37 | -7.5849159796442e-38 | 1.84329801657064e-37 |
diff.log.invest…7 | 1.09898866000851e-38 | 2.19118970648559e-38 | 4.91704674317236e-39 | -1.70444094452624e-38 |
diff.log.gce…7 | 6.045842474227e-38 | 6.63072766928136e-38 | 2.19700121521296e-38 | 7.83684941744905e-38 |
#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(lassovarCV)$y1),
as.matrix(coef(lassovarCV)$y2),
as.matrix(coef(lassovarCV)$y3),
as.matrix(coef(lassovarCV)$y4))
rownames(zzz)<-rownames(data.frame(as.matrix(coef(lassovarCV)$y1)))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in LASSO Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G | |
---|---|---|---|---|
(Intercept) | 0.0151858897298007 | 0.0153645580354301 | 0.0157096142355653 | 0.0152194105076093 |
diff.log.gdp.. | 0 | 0 | 0 | 0 |
diff.log.pcec.. | 0 | 0 | 0 | 0 |
diff.log.invest.. | 0 | 0 | 0 | 0 |
diff.log.gce.. | 0 | 0 | 0 | 0 |
diff.log.gdp…1 | 0 | 0 | 0 | 0 |
diff.log.pcec…1 | 0 | 0 | 0 | 0 |
diff.log.invest…1 | 0 | 0 | 0 | 0 |
diff.log.gce…1 | 0 | 0 | 0 | 0 |
diff.log.gdp…2 | 0 | 0 | 0 | 0 |
diff.log.pcec…2 | 0 | 0 | 0 | 0 |
diff.log.invest…2 | 0 | 0 | 0 | 0 |
diff.log.gce…2 | 0 | 0 | 0 | 0 |
diff.log.gdp…3 | 0 | 0 | 0 | 0 |
diff.log.pcec…3 | 0 | 0 | 0 | 0 |
diff.log.invest…3 | 0 | 0 | 0 | 0 |
diff.log.gce…3 | 0 | 0 | 0 | 0 |
diff.log.gdp…4 | 0 | 0 | 0 | 0 |
diff.log.pcec…4 | 0 | 0 | 0 | 0 |
diff.log.invest…4 | 0 | 0 | 0 | 0 |
diff.log.gce…4 | 0 | 0 | 0 | 0 |
diff.log.gdp…5 | 0 | 0 | 0 | 0 |
diff.log.pcec…5 | 0 | 0 | 0 | 0 |
diff.log.invest…5 | 0 | 0 | 0 | 0 |
diff.log.gce…5 | 0 | 0 | 0 | 0 |
diff.log.gdp…6 | 0 | 0 | 0 | 0 |
diff.log.pcec…6 | 0 | 0 | 0 | 0 |
diff.log.invest…6 | 0 | 0 | 0 | 0 |
diff.log.gce…6 | 0 | 0 | 0 | 0 |
diff.log.gdp…7 | 0 | 0 | 0 | 0 |
diff.log.pcec…7 | 0 | 0 | 0 | 0 |
diff.log.invest…7 | 0 | 0 | 0 | 0 |
diff.log.gce…7 | 0 | 0 | 0 | 0 |
#Make Coefficient Table
zzz<-data.frame(AICVAR$varresult$diff.log.gdp..$coefficients,
AICVAR$varresult$diff.log.pcec..$coefficients,
AICVAR$varresult$diff.log.invest..$coefficients,
AICVAR$varresult$diff.log.gce..$coefficients)
rownames(zzz)<-rownames(data.frame(AICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in AIC Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G | |
---|---|---|---|---|
diff.log.gdp…l1 | -0.746943763057611 | -0.339280717024153 | -4.06998274098274 | -0.115524482447805 |
diff.log.pcec…l1 | 0.542887399420515 | 0.0603927127377116 | 3.52746849769727 | 0.172366474214833 |
diff.log.invest…l1 | 0.147665750520901 | 0.0996000486935814 | 0.628142284010407 | 0.0593200663481155 |
diff.log.gce…l1 | 0.232979968469864 | 0.127013734113583 | 0.546793026132765 | 0.490074188116684 |
diff.log.gdp…l2 | 0.148276807314082 | 0.0030153149531751 | 0.700719436813809 | 0.296184284489155 |
diff.log.pcec…l2 | -0.0468729316703431 | 0.0689250929776537 | -0.53805307285928 | -0.0349076307322074 |
diff.log.invest…l2 | 0.0142184321886127 | 0.0236461267511497 | -0.0136170202386893 | -0.0316570502447788 |
diff.log.gce…l2 | -0.025225073658209 | -0.102578974706382 | -0.00283444415118627 | 0.0579397109188032 |
diff.log.gdp…l3 | 0.233299129276807 | 0.193302574074163 | 0.303322310559933 | 0.0908340876606647 |
diff.log.pcec…l3 | -0.0516608151840398 | -0.007015396497794 | -0.1401673205575 | -0.160195069905048 |
diff.log.invest…l3 | -0.0585268233889642 | -0.0584938941415848 | -0.155597845827494 | 0.0250097255157367 |
diff.log.gce…l3 | -0.0913173183824062 | -0.0280589876912641 | -0.420294633255246 | 0.063747863705081 |
diff.log.gdp…l4 | 0.301371080744814 | 0.120097659148702 | 1.60230119894812 | 0.327298748721874 |
diff.log.pcec…l4 | 0.130764968897992 | 0.185885995556202 | 0.0787419661514829 | -0.248639740861761 |
diff.log.invest…l4 | -0.0700006765330574 | -0.0166441973406796 | -0.417800332681899 | -0.00209708631479729 |
diff.log.gce…l4 | -0.146455512729859 | -0.0406473284896966 | -0.67189744156677 | -0.0470418491889946 |
diff.log.gdp…l5 | -0.610591245995868 | -0.169926856449345 | -2.29212310983113 | -0.447928309402065 |
diff.log.pcec…l5 | 0.468231178107616 | 0.357972406028735 | 0.894382816968065 | 0.306927940613055 |
diff.log.invest…l5 | 0.0432689297333812 | -0.0198516764329586 | 0.192061121492337 | 0.0570281247813538 |
diff.log.gce…l5 | 0.186550699795729 | 0.125842023517093 | 0.595685589600826 | 0.0211641039656517 |
const | 0.00567042047478156 | 0.00620177914475714 | 0.00935819315081179 | 0.00184789666280374 |
#Make Coefficient Table
zzz<-data.frame(BICVAR$varresult$diff.log.gdp..$coefficients,
BICVAR$varresult$diff.log.pcec..$coefficients,
BICVAR$varresult$diff.log.invest..$coefficients,
BICVAR$varresult$diff.log.gce..$coefficients)
rownames(zzz)<-rownames(data.frame(BICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in BIC Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G | |
---|---|---|---|---|
diff.log.gdp…l1 | -0.616078505592147 | -0.216952693351494 | -4.11590175514894 | 0.0806110991176697 |
diff.log.pcec…l1 | 0.51185181145174 | 0.0369735342765242 | 3.65466265933299 | -0.00896787215583153 |
diff.log.invest…l1 | 0.138957227319627 | 0.0887645774343753 | 0.661995796320753 | 0.0499547661798207 |
diff.log.gce…l1 | 0.186098309220188 | 0.10353264493876 | 0.136023745308116 | 0.614133345716592 |
const | 0.0116164872700755 | 0.0150636667863203 | 0.00984807889662198 | 0.00412528923826631 |