Regularization

Risk Minimization

Structural Risk Minimization

Cross Validation

Cross validation over multiple hypothesis classes

Evaluating Cross Validation

Information Criteria

Evaluation and implementation

Parameter size based penalties

Evaluation and Implementation

What kinds of model classes?

Function based penalties

Smoothing Splines: GDP growth from Lagged GDP growth

library(fpp2) #Forecasting
library(glmnet) #Lasso and Ridge
library(vars) #Vector Autoregressions
library(fredr) #Access FRED Data

##Obtain and transform NIPA Data (cf Lecture 08)

fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class

#US GDP components 
GDP<-fredr(series_id = "GDP",
           observation_start = as.Date("1947-01-01")) #Gross Domestic Product

#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
            observation_start = as.Date("1947-01-01")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
           observation_start = as.Date("1947-01-01")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
           observation_start = as.Date("1947-01-01")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
              observation_start = as.Date("1947-01-01")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
           observation_start = as.Date("1947-01-01")) #Government Consumption Expenditures and Gross Investment

#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product") 
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")

#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))

# Construct lags

l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2018,3))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2018,3))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2018,3))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2018,3))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2018,3))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2018,3))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2018,3))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2018,3))

#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2018,3)),length(l1NIPA[,1]),4)

#Spline models, yeah?
library(gam) #loads smoothing spline package
lagDlogGDP<-xvars[,1]
DlogGDP<-NIPAwindow[,1]
#Smoothing spline Regression of GDP on lagged GDP
# Default behavior fits penalty parameter by generalized Cross Validation
# Predict GDP growth using smoothing spline in lagged GDP growth
gdp.spl<-smooth.spline(lagDlogGDP,DlogGDP)
splinepredictions<-predict(gdp.spl)
#Build Plot
gdpdata<-data.frame(xvars[,1],NIPAwindow[,1],splinepredictions$x,splinepredictions$y)
colnames(gdpdata)<-c("LDlGDP","DlGDP","xspline","yspline")

ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
  geom_line(aes(x=xspline,y=yspline),color="red",size=1)+
  ggtitle("Smoothing Spline Fit of This vs Last Quarter's GDP Growth",
          subtitle="Almost exactly linear due to large optimal penalty for nonlinearity")+
  xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")

# An alternate approach which uses "gam" command to fit spline
# Advantage is that it's built in to ggplot
# Disadvantage is that it uses only default options, so not customizable
# In this case, results are exactly the same

# ggplot(gdpdata,aes(x=LDlGDP,y=DlGDP))+geom_point()+
#   ggtitle("Spline and OLS Fit of This vs  Last Quarter's GDP Growth",
#           subtitle="There are two lines which exactly overlap")+
#   xlab("Change in Log GDP, t-1")+ylab("Change in Log GDP, t")+
#   geom_smooth(method=gam,se=FALSE,color="blue")+
#   geom_smooth(method=lm,se=FALSE,color="red")

Exotic penalties

Penalties versus Bounds

Regularization for GDP Forecasting

library(dplyr) #Data manipulation
library(knitr) #Use knitr to make tables
library(kableExtra) #Extra options for tables
library(tibble) #More data manipulation
#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)

# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])

#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=1)$forecast
BICpred<-forecast(BICVAR,h=1)$forecast

#Extract point forecasts
AICfcsts<-select(data.frame(AICpred),ends_with("Forecast"))
BICfcsts<-select(data.frame(BICpred),ends_with("Forecast"))


#Fit LASSO VAR in all variables up to 8 lags
lassovar<-glmnet(xvars,NIPAwindow,family="mgaussian")

#Show range of lambda values from unpenalized to where all variables gone
#lassovar$lambda 

#Show coefficients for large value of lambda (heavy penalization, most coefs gone)
#coef(lassovar,s=0.02)

#Show coefficients for intermediate value of lambda (medium penalization, some coefs gone)
#coef(lassovar,s=0.008)

#Show coefficients for small value of lambda (little penalization, most coefs remain)
#coef(lassovar,s=0.001)

## Fit LASSO VAR with 10-fold cross-validation
lassovarCV<-cv.glmnet(xvars,NIPAwindow,family="mgaussian")

#Lambda giving minimum 10-fold Cross Validation Error
CVlambda<-lassovarCV$lambda.min

#Baroque construction to take last period and format it for predict command
#Somehow need 2 rows in prediction value or it throws a fit 
#Presumably because matrix coerced to vector and then it thinks it can't multiply anymore
newX<-t(as.matrix(data.frame(as.vector(t(NIPAwindow[270:277,])), as.vector(t(NIPAwindow[271:278,])))))

#Predict 2018Q3 series
cvlassopredictions<-window(ts(predict(lassovarCV,newx = newX)[,,1],start=c(2018,3),frequency=4),start=c(2018,4))

#Fit L2 Penalized VAR in all variables up to 8 lags
ridgevar<-glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")

#Show range of lambda values from unpenalized to where all variables gone
#ridgevar$lambda 

#Show coefficients for large value of lambda (heavy penalization, coefs shrink a lot)
#coef(ridgevar,s=10)

#Show coefficients for intermediate value of lambda (medium penalization, coefs shrink a little)
#coef(ridgevar,s=1)

#Show coefficients for small value of lambda (little penalization, coefs close to OLS values)
#coef(ridgevar,s=0.01)

## Fit Ridge VAR with 10-fold cross-validation
ridgevarCV<-cv.glmnet(xvars,NIPAwindow,alpha=0,family="mgaussian")

#Lambda giving minimum 10-fold Cross Validation Error
CVlambda2<-ridgevarCV$lambda.min

#Predict 2018Q3 series
cvridgepredictions<-window(ts(predict(ridgevarCV,newx = newX)[,,1],start=c(2018,3),frequency=4),start=c(2018,4))

# #Plot Data Series
# autoplot(window(NIPAdata,start=c(2012,4)))+
#   autolayer(cvlassopredictions)+
#   autolayer(cvridgepredictions)

#  autolayer(AICpred$diff.log.gdp..$mean)+
 # autolayer(BICpred$diff.log.gdp..$mean)
#Collect 2018Q4 Forecasts into a data frame
var1table<-data.frame(
  t(cvridgepredictions),
  t(cvlassopredictions),
  t(AICfcsts),
  t(BICfcsts)
)
rownames(var1table)<-c("GDP","Consumption","Investment","Government")
#Make Table of Estimated Coefficients
kable(var1table,
  col.names=c("Ridge","LASSO","AIC","BIC"),
  caption="2018Q4 Forecasts from Different Methods") %>%
  kable_styling(bootstrap_options = "striped", font_size = 16)
2018Q4 Forecasts from Different Methods
Ridge LASSO AIC BIC
GDP 0.0135764 0.0136399 0.0114672 0.0146535
Consumption 0.0145109 0.0150744 0.0114084 0.0137662
Investment 0.0141675 0.0096423 0.0147074 0.0200288
Government 0.0112001 0.0129239 0.0109967 0.0142961

Ridge Coefficients

#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(ridgevarCV)$y1),
                as.matrix(coef(ridgevarCV)$y2),
                as.matrix(coef(ridgevarCV)$y3),
                as.matrix(coef(ridgevarCV)$y4))


rownames(zzz)<-rownames(data.frame(as.matrix(coef(ridgevarCV)$y1)))
zzz %>%
mutate_all(function(x) {
    cell_spec(x, bold = T,
              color = spec_color(x))
  }) %>%
  kable(escape=F,
  col.names=c("GDP","C","I","G"),
  caption="Coefficients in Ridge Regression") %>%
  kable_styling(bootstrap_options = "striped", font_size=10)
Coefficients in Ridge Regression
GDP C I G
0.00786403925303112 0.00788037586534336 0.0148332433942633 0.00238697200569118
0.0865957812778728 0.0551696965390487 0.244556969538606 0.0942090494910388
0.109959994411296 0.0140186300705327 0.652435086528036 0.0271147781051111
0.0123857804601417 0.0146716727038759 0.0263673253409791 0.00451694512000495
0.0106451182691282 -0.000384426841756296 -0.10326596491934 0.129021260675576
0.0621757521642605 0.0507324374153117 0.0591602476118126 0.0915652117483974
0.0685132890145487 0.0872907695940012 0.0201799922326903 0.0757765565334758
0.00980081641152812 0.00612229583374719 0.0264123256900968 0.00341895148384484
-0.00091364858455345 -0.0183671961947785 -0.0452636148290377 0.0728980049798622
0.00222284364167427 0.000153134673311346 -0.128645457778679 0.0690242604269709
0.0281940450636521 0.0281374237646931 -0.0171630376438789 0.0241099553055666
-0.000742106997763265 -0.00393726953375673 -0.0148823646684543 0.00991134660015443
-0.0117103950975583 0.00390704975726952 -0.124060760551806 0.042365378160212
0.00466436718790951 0.0198055138385727 -0.11583119132355 0.0696018152897233
0.0261427471133706 0.0118711044130767 0.0680154676403516 0.0163018495414982
-0.00167161870530863 0.00573285857652937 -0.0541920520539569 0.0168684912766791
-0.00858261369081641 -0.000107206981960806 -0.0348551138082085 0.0112101644913383
-0.0232727042683797 -0.00429662421540572 -0.167820490545611 0.0172672049947349
0.00391586808909369 0.0427292362405572 -0.200572063884623 0.0349239157634509
-0.00664742359679361 -0.0104016875491473 -0.0190333240075376 0.00595507032903712
0.00538793204213564 0.0128902984939667 0.02284614151604 -0.0114685479470082
0.00544131900827601 0.0144369960515328 -0.0303801828996868 -0.00363175835548809
0.0164253253291509 0.018465576565109 0.0345073433829858 -0.0126745355744008
-0.00190203067553394 5.72598991601035e-05 -0.0190653286699528 0.00404537508886109
0.0197841229410908 0.0160085909139529 0.0842755170708496 0.00830309472111045
0.0120147540294824 0.0243376461687585 -0.0165184864873954 0.00719547800775078
0.040296252839528 0.0596415983729323 0.02774714220003 0.0114532053990684
-0.0024274892654373 -0.0029465709365635 0.00107392698826947 -0.00148227405584982
0.0143235423939472 0.0095686832241577 0.0646786514144629 0.00189470282102794
0.00654277888835529 0.0260428281389034 -0.0835351764832077 0.00556255654741
0.00740736376961867 0.0193445802289786 -0.0299715588564189 0.0107470604622051
0.00244101702686358 0.00315971586949353 0.00301646433765767 -0.00377054987395623
-0.0048771079424408 -0.000351952561224234 -0.0574670100128757 0.010569599656017

LASSO Coefficients

#Make Coefficient Table
zzz<-data.frame(as.matrix(coef(lassovarCV)$y1),
                as.matrix(coef(lassovarCV)$y2),
                as.matrix(coef(lassovarCV)$y3),
                as.matrix(coef(lassovarCV)$y4))

rownames(zzz)<-rownames(data.frame(as.matrix(coef(lassovarCV)$y1)))
zzz %>%
mutate_all(function(x) {
    cell_spec(x, bold = T,
              color = if_else(x!=0,"red","black"))
  }) %>%
  kable(escape=F,
  col.names=c("GDP","C","I","G"),
  caption="Coefficients in LASSO Regression") %>%
  kable_styling(bootstrap_options = "striped", font_size=10)
Coefficients in LASSO Regression
GDP C I G
0.0100302004924729 0.0129798197145501 0.00168310308800569 0.00756634626175604
0 0 0 0
0.349959458943998 0.130476409727349 1.56554483539441 0.175646554336095
0 0 0 0
0.0189475717511366 0.00825380129907898 -0.200786258486333 0.211575231022215
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0.00317825905700754 0.00415960881665426 -0.0480514970684653 0.026735352282484
0 0 0 0
0 0 0 0
-0.000579291079324646 0.00107644280868163 -0.0116456790667297 0.00480000882876038
0 0 0 0
0 0 0 0
0.000140488939523953 0.00533714789277771 -0.0489610167268404 0.0198565317923048
0 0 0 0
-0.0248114736550768 0.00358810219897533 -0.246221267350789 0.0430723482194091
0.00499345322235313 0.0241926374897687 -0.102657716554927 0.0268686037123245
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0

AIC Coefficients

#Make Coefficient Table
zzz<-data.frame(AICVAR$varresult$diff.log.gdp..$coefficients,
                AICVAR$varresult$diff.log.pcec..$coefficients,
                AICVAR$varresult$diff.log.invest..$coefficients,
                AICVAR$varresult$diff.log.gce..$coefficients)

rownames(zzz)<-rownames(data.frame(AICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
    cell_spec(x, bold = T,
              color = if_else(x!=0,"red","black"))
  }) %>%
  kable(escape=F,
  col.names=c("GDP","C","I","G"),
  caption="Coefficients in AIC Regression") %>%
  kable_styling(bootstrap_options = "striped", font_size=10)
Coefficients in AIC Regression
GDP C I G
0.0142423802331418 0.335379954233719 -1.66567424374834 -0.0458342811491882
0.412581978510042 -0.145870908410638 3.63202838343984 0.143896666113282
0.0274378660517431 -0.0156497514807466 0.287226644977188 0.042002491417178
0.0643892203157539 -0.0233881252807245 0.0174160286406101 0.472850970012959
0.229659888723582 0.269399415045273 -0.143262388462611 0.351609194937956
0.0702970032085799 0.173522014804767 -0.0975467195269845 0.0158414129010053
-0.0171415190305722 -0.0323539077927016 0.0475055185498714 -0.0412019143794856
-0.0420896176116723 -0.158539060486606 0.177660920656683 0.0443322148452024
-0.0530503159805621 -0.105453086053541 -0.417634348755004 0.0518474932711676
0.0417016846902634 0.159668297181366 -0.283642111390404 -0.128775449751194
-0.0209115301435752 -0.0200053835083251 -0.0549504273458671 0.030162245635792
-0.0244971810971676 0.0310810988172909 -0.186714858168813 0.072437242832677
0.194846774896595 -0.0456390218211937 1.69673325899881 0.291405949868535
0.0438158991226434 0.118599212590941 -0.318354688568522 -0.266886732092782
-0.0436234814162472 0.0124385525781957 -0.371750797774807 0.00287302373823534
-0.0939728362880406 0.0256517076994152 -0.641177916328012 -0.0349723904845601
-0.375654283804274 0.0714799076733018 -1.78345775966185 -0.427496739634625
0.214299611678092 0.0808915731483531 0.421133953050024 0.275045376137453
0.0179499285089324 -0.0456210321684576 0.139224678770525 0.055414493021536
0.115882613339792 0.0566722429832534 0.423603489217224 0.0137362064712486
0.00327979811668631 0.00411587418405196 0.00146212088776681 0.00125004595756258

BIC Coefficients

#Make Coefficient Table
zzz<-data.frame(BICVAR$varresult$diff.log.gdp..$coefficients,
                BICVAR$varresult$diff.log.pcec..$coefficients,
                BICVAR$varresult$diff.log.invest..$coefficients,
                BICVAR$varresult$diff.log.gce..$coefficients)

rownames(zzz)<-rownames(data.frame(BICVAR$varresult$diff.log.gdp..$coefficients))
zzz %>%
mutate_all(function(x) {
    cell_spec(x, bold = T,
              color = if_else(x!=0,"red","black"))
  }) %>%
  kable(escape=F,
  col.names=c("GDP","C","I","G"),
  caption="Coefficients in BIC Regression") %>%
  kable_styling(bootstrap_options = "striped", font_size=10)
Coefficients in BIC Regression
GDP C I G
0.121952811213594 0.443954168101468 -1.96535758395511 0.127651524807346
0.407141812161502 -0.1140622757727 3.58351024967781 0.00635501513565612
0.0354252888148711 -0.00443586493533627 0.360026691647868 0.0435240022800619
0.00925600061424106 -0.0503410167955548 -0.389919533571099 0.598248368556429
0.00650643901357845 0.0114194256502141 -0.00914962148014678 0.0035845689397845

Evaluating Autoregressive Forecasts

Application to GDP Series in BIC VAR Prediction

checkresiduals(BICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals
## LM test = 36.01, df = 10, p-value = 8.386e-05

Conclusions

References