tsCV
) \widehat{R}^{tsCV}(\widehat{f})=\frac{1}{T-h}\sum_{t=1}^{T-h}\ell(y_{t+h},\widehat{f}(\mathcal{Y}_{t}))Arima
, VAR
, ets
, etc, or ex post by CV
auto.arima
, to choose between orders of ARVARselect
computes AIC/BIC for VAR over loss \log\det(\sum_{t=1}^{T-h}(y_{t+h}-\widehat{y}_{t})(y_{t+h}-\widehat{y}_{t})^\prime)glmnet
cv.glmnet
implements cross validation parameter selection, but not time series versionsmooth.spline
as well as in specialized functions in libraries gam
or mgcv
library(fpp2) #Forecasting
library(glmnet) #Lasso and Ridge
library(vars) #Vector Autoregressions
library(fredr) #Access FRED Data
##Obtain and transform NIPA Data (cf Lecture 08)
fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
#US GDP components
GDP<-fredr(series_id = "GDP",
observation_start = as.Date("1947-01-01")) #Gross Domestic Product
#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
observation_start = as.Date("1947-01-01")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
observation_start = as.Date("1947-01-01")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
observation_start = as.Date("1947-01-01")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
observation_start = as.Date("1947-01-01")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
observation_start = as.Date("1947-01-01")) #Government Consumption Expenditures and Gross Investment
#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product")
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")
#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))
# Construct lags
l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2018,3))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2018,3))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2018,3))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2018,3))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2018,3))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2018,3))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2018,3))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2018,3))
#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2018,3)),length(l1NIPA[,1]),4)
#Spline models, yeah?
library(gam) #loads smoothing spline package
lagDlogGDP<-xvars[,1]
DlogGDP<-NIPAwindow[,1]
#Smoothing spline Regression of GDP on lagged GDP
# Default behavior fits penalty parameter by generalized Cross Validation
# Predict GDP growth using smoothing spline in lagged GDP growth
gdp.spl<-smooth.spline(lagDlogGDP,DlogGDP)
splinepredictions<-predict(gdp.spl)
#Build Plot
gdpdata<-data.frame(xvars[,1],NIPAwindow[,1],splinepredictions$x,splinepredictions$y)
colnames(gdpdata)<-c("LDlGDP","DlGDP","xspline","yspline")
ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
geom_line(aes(x=xspline,y=yspline),color="red",size=1)+
ggtitle("Smoothing Spline Fit of This vs Last Quarter's GDP Growth",
subtitle="Almost exactly linear due to large optimal penalty for nonlinearity")+
xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")
# An alternate approach which uses "gam" command to fit spline
# Advantage is that it's built in to ggplot
# Disadvantage is that it uses only default options, so not customizable
# In this case, results are exactly the same
# ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
# ggtitle("Spline and OLS Fit of This vs Last Quarter's GDP Growth",
# subtitle="There are two lines which exactly overlap")+
# xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")+
# geom_smooth(method=gam,se=FALSE,color="blue")+
# geom_smooth(method=lm,se=FALSE,color="red")
library(dplyr) #Data manipulation
library(knitr) #Use knitr to make tables
library(kableExtra) #Extra options for tables
library(tibble) #More data manipulation
#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)
# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])
#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=1)$forecast
BICpred<-forecast(BICVAR,h=1)$forecast
#Extract point forecasts
AICfcsts<-select(data.frame(AICpred),ends_with("Forecast"))
BICfcsts<-select(data.frame(BICpred),ends_with("Forecast"))
#Fit LASSO VAR in all variables up to 8 lags
lassovar<-glmnet(xvars,NIPAwindow,family="mgaussian")
#Show range of lambda values from unpenalized to where all variables gone
#lassovar$lambda
#Show coefficients for large value of lambda (heavy penalization, most coefs gone)
#coef(lassovar,s=0.02)
#Show coefficients for intermediate value of lambda (medium penalization, some coefs gone)
#coef(lassovar,s=0.008)
#Show coefficients for small value of lambda (little penalization, most coefs remain)
#coef(lassovar,s=0.001)
## Fit LASSO VAR with 10-fold cross-validation
lassovarCV<-cv.glmnet(xvars,NIPAwindow,family="mgaussian")
#Lambda giving minimum 10-fold Cross Validation Error
CVlambda<-lassovarCV$lambda.min
#Baroque construction to take last period and format it for predict command
#Somehow need 2 rows in prediction value or it throws a fit
#Presumably because matrix coerced to vector and then it thinks it can't multiply anymore
newX<-t(as.matrix(data.frame(as.vector(t(NIPAwindow[270:277,])), as.vector(t(NIPAwindow[271:278,])))))
#Predict 2018Q3 series
cvlassopredictions<-window(ts(predict(lassovarCV,newx = newX)[,,1],start=c(2018,3),frequency=4),start=c(2018,4))
#Fit L2 Penalized VAR in all variables up to 8 lags
ridgevar<-glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")
#Show range of lambda values from unpenalized to where all variables gone
#ridgevar$lambda
#Show coefficients for large value of lambda (heavy penalization, coefs shrink a lot)
#coef(ridgevar,s=10)
#Show coefficients for intermediate value of lambda (medium penalization, coefs shrink a little)
#coef(ridgevar,s=1)
#Show coefficients for small value of lambda (little penalization, coefs close to OLS values)
#coef(ridgevar,s=0.01)
## Fit Ridge VAR with 10-fold cross-validation
ridgevarCV<-cv.glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")
#Lambda giving minimum 10-fold Cross Validation Error
CVlambda2<-ridgevarCV$lambda.min
#Predict 2018Q3 series
cvridgepredictions<-window(ts(predict(ridgevarCV,newx = newX)[,,1],start=c(2018,3),frequency=4),start=c(2018,4))
# #Plot Data Series
# autoplot(window(NIPAdata,start=c(2012,4)))+
# autolayer(cvlassopredictions)+
# autolayer(cvridgepredictions)
# autolayer(AICpred$diff.log.gdp..$mean)+
# autolayer(BICpred$diff.log.gdp..$mean)
#Collect 2018Q4 Forecasts into a data frame
var1table<-data.frame(
t(cvridgepredictions),
t(cvlassopredictions),
t(AICfcsts),
t(BICfcsts)
)
rownames(var1table)<-c("GDP","Consumption","Investment","Government")
#Make Table of Estimated Coefficients
kable(var1table,
col.names=c("Ridge","LASSO","AIC","BIC"),
caption="2018Q4 Forecasts from Different Methods") %>%
kable_styling(bootstrap_options = "striped", font_size = 16)
Ridge | LASSO | AIC | BIC | |
---|---|---|---|---|
GDP | 0.0135764 | 0.0136399 | 0.0114672 | 0.0146535 |
Consumption | 0.0145109 | 0.0150744 | 0.0114084 | 0.0137662 |
Investment | 0.0141675 | 0.0096423 | 0.0147074 | 0.0200288 |
Government | 0.0112001 | 0.0129239 | 0.0109967 | 0.0142961 |
#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(ridgevarCV)$y1),
as.matrix(coef(ridgevarCV)$y2),
as.matrix(coef(ridgevarCV)$y3),
as.matrix(coef(ridgevarCV)$y4))
rownames(zzz)<-rownames(data.frame(as.matrix(coef(ridgevarCV)$y1)))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = spec_color(x))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in Ridge Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G |
---|---|---|---|
0.00786403925303112 | 0.00788037586534336 | 0.0148332433942633 | 0.00238697200569118 |
0.0865957812778728 | 0.0551696965390487 | 0.244556969538606 | 0.0942090494910388 |
0.109959994411296 | 0.0140186300705327 | 0.652435086528036 | 0.0271147781051111 |
0.0123857804601417 | 0.0146716727038759 | 0.0263673253409791 | 0.00451694512000495 |
0.0106451182691282 | -0.000384426841756296 | -0.10326596491934 | 0.129021260675576 |
0.0621757521642605 | 0.0507324374153117 | 0.0591602476118126 | 0.0915652117483974 |
0.0685132890145487 | 0.0872907695940012 | 0.0201799922326903 | 0.0757765565334758 |
0.00980081641152812 | 0.00612229583374719 | 0.0264123256900968 | 0.00341895148384484 |
-0.00091364858455345 | -0.0183671961947785 | -0.0452636148290377 | 0.0728980049798622 |
0.00222284364167427 | 0.000153134673311346 | -0.128645457778679 | 0.0690242604269709 |
0.0281940450636521 | 0.0281374237646931 | -0.0171630376438789 | 0.0241099553055666 |
-0.000742106997763265 | -0.00393726953375673 | -0.0148823646684543 | 0.00991134660015443 |
-0.0117103950975583 | 0.00390704975726952 | -0.124060760551806 | 0.042365378160212 |
0.00466436718790951 | 0.0198055138385727 | -0.11583119132355 | 0.0696018152897233 |
0.0261427471133706 | 0.0118711044130767 | 0.0680154676403516 | 0.0163018495414982 |
-0.00167161870530863 | 0.00573285857652937 | -0.0541920520539569 | 0.0168684912766791 |
-0.00858261369081641 | -0.000107206981960806 | -0.0348551138082085 | 0.0112101644913383 |
-0.0232727042683797 | -0.00429662421540572 | -0.167820490545611 | 0.0172672049947349 |
0.00391586808909369 | 0.0427292362405572 | -0.200572063884623 | 0.0349239157634509 |
-0.00664742359679361 | -0.0104016875491473 | -0.0190333240075376 | 0.00595507032903712 |
0.00538793204213564 | 0.0128902984939667 | 0.02284614151604 | -0.0114685479470082 |
0.00544131900827601 | 0.0144369960515328 | -0.0303801828996868 | -0.00363175835548809 |
0.0164253253291509 | 0.018465576565109 | 0.0345073433829858 | -0.0126745355744008 |
-0.00190203067553394 | 5.72598991601035e-05 | -0.0190653286699528 | 0.00404537508886109 |
0.0197841229410908 | 0.0160085909139529 | 0.0842755170708496 | 0.00830309472111045 |
0.0120147540294824 | 0.0243376461687585 | -0.0165184864873954 | 0.00719547800775078 |
0.040296252839528 | 0.0596415983729323 | 0.02774714220003 | 0.0114532053990684 |
-0.0024274892654373 | -0.0029465709365635 | 0.00107392698826947 | -0.00148227405584982 |
0.0143235423939472 | 0.0095686832241577 | 0.0646786514144629 | 0.00189470282102794 |
0.00654277888835529 | 0.0260428281389034 | -0.0835351764832077 | 0.00556255654741 |
0.00740736376961867 | 0.0193445802289786 | -0.0299715588564189 | 0.0107470604622051 |
0.00244101702686358 | 0.00315971586949353 | 0.00301646433765767 | -0.00377054987395623 |
-0.0048771079424408 | -0.000351952561224234 | -0.0574670100128757 | 0.010569599656017 |
#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(lassovarCV)$y1),
as.matrix(coef(lassovarCV)$y2),
as.matrix(coef(lassovarCV)$y3),
as.matrix(coef(lassovarCV)$y4))
rownames(zzz)<-rownames(data.frame(as.matrix(coef(lassovarCV)$y1)))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in LASSO Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G |
---|---|---|---|
0.0100302004924729 | 0.0129798197145501 | 0.00168310308800569 | 0.00756634626175604 |
0 | 0 | 0 | 0 |
0.349959458943998 | 0.130476409727349 | 1.56554483539441 | 0.175646554336095 |
0 | 0 | 0 | 0 |
0.0189475717511366 | 0.00825380129907898 | -0.200786258486333 | 0.211575231022215 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0.00317825905700754 | 0.00415960881665426 | -0.0480514970684653 | 0.026735352282484 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
-0.000579291079324646 | 0.00107644280868163 | -0.0116456790667297 | 0.00480000882876038 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0.000140488939523953 | 0.00533714789277771 | -0.0489610167268404 | 0.0198565317923048 |
0 | 0 | 0 | 0 |
-0.0248114736550768 | 0.00358810219897533 | -0.246221267350789 | 0.0430723482194091 |
0.00499345322235313 | 0.0241926374897687 | -0.102657716554927 | 0.0268686037123245 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 |
#Make Coefficient Table
zzz<-data.frame(AICVAR$varresult$diff.log.gdp..$coefficients,
AICVAR$varresult$diff.log.pcec..$coefficients,
AICVAR$varresult$diff.log.invest..$coefficients,
AICVAR$varresult$diff.log.gce..$coefficients)
rownames(zzz)<-rownames(data.frame(AICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in AIC Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G |
---|---|---|---|
0.0142423802331418 | 0.335379954233719 | -1.66567424374834 | -0.0458342811491882 |
0.412581978510042 | -0.145870908410638 | 3.63202838343984 | 0.143896666113282 |
0.0274378660517431 | -0.0156497514807466 | 0.287226644977188 | 0.042002491417178 |
0.0643892203157539 | -0.0233881252807245 | 0.0174160286406101 | 0.472850970012959 |
0.229659888723582 | 0.269399415045273 | -0.143262388462611 | 0.351609194937956 |
0.0702970032085799 | 0.173522014804767 | -0.0975467195269845 | 0.0158414129010053 |
-0.0171415190305722 | -0.0323539077927016 | 0.0475055185498714 | -0.0412019143794856 |
-0.0420896176116723 | -0.158539060486606 | 0.177660920656683 | 0.0443322148452024 |
-0.0530503159805621 | -0.105453086053541 | -0.417634348755004 | 0.0518474932711676 |
0.0417016846902634 | 0.159668297181366 | -0.283642111390404 | -0.128775449751194 |
-0.0209115301435752 | -0.0200053835083251 | -0.0549504273458671 | 0.030162245635792 |
-0.0244971810971676 | 0.0310810988172909 | -0.186714858168813 | 0.072437242832677 |
0.194846774896595 | -0.0456390218211937 | 1.69673325899881 | 0.291405949868535 |
0.0438158991226434 | 0.118599212590941 | -0.318354688568522 | -0.266886732092782 |
-0.0436234814162472 | 0.0124385525781957 | -0.371750797774807 | 0.00287302373823534 |
-0.0939728362880406 | 0.0256517076994152 | -0.641177916328012 | -0.0349723904845601 |
-0.375654283804274 | 0.0714799076733018 | -1.78345775966185 | -0.427496739634625 |
0.214299611678092 | 0.0808915731483531 | 0.421133953050024 | 0.275045376137453 |
0.0179499285089324 | -0.0456210321684576 | 0.139224678770525 | 0.055414493021536 |
0.115882613339792 | 0.0566722429832534 | 0.423603489217224 | 0.0137362064712486 |
0.00327979811668631 | 0.00411587418405196 | 0.00146212088776681 | 0.00125004595756258 |
#Make Coefficient Table
zzz<-data.frame(BICVAR$varresult$diff.log.gdp..$coefficients,
BICVAR$varresult$diff.log.pcec..$coefficients,
BICVAR$varresult$diff.log.invest..$coefficients,
BICVAR$varresult$diff.log.gce..$coefficients)
rownames(zzz)<-rownames(data.frame(BICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
cell_spec(x, bold = T,
color = if_else(x!=0,"red","black"))
}) %>%
kable(escape=F,
col.names=c("GDP","C","I","G"),
caption="Coefficients in BIC Regression") %>%
kable_styling(bootstrap_options = "striped", font_size=10)
GDP | C | I | G |
---|---|---|---|
0.121952811213594 | 0.443954168101468 | -1.96535758395511 | 0.127651524807346 |
0.407141812161502 | -0.1140622757727 | 3.58351024967781 | 0.00635501513565612 |
0.0354252888148711 | -0.00443586493533627 | 0.360026691647868 | 0.0435240022800619 |
0.00925600061424106 | -0.0503410167955548 | -0.389919533571099 | 0.598248368556429 |
0.00650643901357845 | 0.0114194256502141 | -0.00914962148014678 | 0.0035845689397845 |
checkresiduals
to inspect and compare various measures of distancecheckresiduals(BICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals
## LM test = 36.01, df = 10, p-value = 8.386e-05