Statistical Learning: Review

Empirical Risk Minimization

When is uniform error small?

Distribution conditions

Nonstationary Series: Simulations

library(knitr) #Tables
library(fpp2) #Forecast commands
library(kableExtra) #More tables
library(tidyverse) #Data manipulation
library(gridExtra) #Graph Display


tl<-240 #Length of simulated series
set.seed(55) #Initialize random number generator for reproducibility
e1<-ts(rnorm(tl)) #standard normal shocks
e2<-ts(rnorm(tl)) #standard normal shocks
e3<-ts(rnorm(tl)) #standard normal shocks
e4<-ts(rnorm(tl)) #standard normal shocks
rho<-0.7 #AR1 parameter
#AR2 parameters
b0<-3 
b1<-0.4
b2<-0.3
ss<-0.98 #LArge AR parameter
yAR1<-numeric(tl)
yAR1[1]<-(1/(1-rho))*rnorm(1) #Initialize at random draw
yRW<-numeric(tl)
yslowAR<-numeric(tl)

for(s in 1:(tl-1)){
  #Stationary AR(1)
  yAR1[s+1]<-rho*yAR1[s]+e1[s]
  #Stationary but close to nonstationary AR(1)
  yslowAR[s+1]<-ss*yslowAR[s]+e4[s]
  #Random walk 
  yRW[s+1]<-yRW[s]+e3[s]  
}

yAR2<-numeric(tl)
yAR2[1]<-3/0.3 #Initialize at mean
yAR2[2]<-3/0.3 #Initialize at mean

for(s in 1:(tl-2)){
  #Stationary AR(2)
  yAR2[s+2]=b0+b1*yAR2[s+1]+b2*yAR2[s]+e2[s]
}

trend<-0.05*seq(1,tl) #Increasing trend
season<-cos((2*pi/12)*seq(1,tl)) #Seasonal pattern
#Jump at particular time
breakseries<-ts(numeric(tl),start=c(1990,1),frequency=12)
for(s in 1:(tl)){
  if(s>180){
    breakseries[s]<-6
  }
}


#Represent as time series
yAR1<-ts(yAR1, start=c(1990,1),frequency=12)
yRW<-ts(yRW, start=c(1990,1),frequency=12)
yAR2<-ts(yAR2,start=c(1990,1),frequency=12)
yslowAR<-ts(yslowAR,start=c(1990,1),frequency=12)

#Construct nonstationary series
ytrend<-yAR2+trend
yseason<-yAR1+3*season
ybreak<-yAR1+breakseries

tsgraphs<-list()

tsgraphs[[1]]<-autoplot(ytrend)+
  labs(y="Value",title="Series with Trend")
tsgraphs[[2]]<-autoplot(yseason)+
  labs(y="Value",title="Series with Seasonal Pattern")
tsgraphs[[3]]<-autoplot(ybreak)+
  labs(y="Value",title="Series with Structural Break")
tsgraphs[[4]]<-autoplot(yRW)+
  labs(y="Value",title="Random Walk",subtitle="Nonstationary because variance grows")

grid.arrange(grobs=tsgraphs,nrow=2,ncol=2) #Arrange In 2x2 grid

Stationary Series: Simulations

autoplot(yAR1,series="AR(1), b=0.7")+autolayer(yAR2,series="AR(2)")+autolayer(yslowAR,series="AR(1), b=0.98")+
  labs(y="Value",title="Stationary Series")+
  guides(colour=guide_legend(title="Series"))

Evaluating Stationarity and Weak Dependence

ACFs of Example Series

ACFplots<-list()
ACFplots[[1]]<-ggAcf(yAR1)+
  labs(title="Stationary AR(1)",subtitle="Exponential decay")
ACFplots[[2]]<-ggAcf(yAR2)+
  labs(title="Stationary AR(2)",subtitle="Exponential decay")
ACFplots[[3]]<-ggAcf(yslowAR)+
  labs(title="Barely Stationary AR(1)",subtitle="Slow but eventual decay")
ACFplots[[4]]<-ggAcf(yRW)+
  labs(title="Random Walk",subtitle="Linear decay")
ACFplots[[5]]<-ggAcf(ybreak)+
  labs(title="Series with Break",subtitle="Linear decay")
ACFplots[[6]]<-ggAcf(yseason)+
  labs(title="Deterministic Seasonal Series",subtitle="Spikes do not decay")
ACFplots[[7]]<-ggAcf(ytrend)+
  labs(title="Trending Series",subtitle="Linear Decay")
grid.arrange(grobs=ACFplots,nrow=4,ncol=2)

ACF of US Dollar Exchange Rates in 1970s

library(fpp2)
library(fredr) #Access FRED, the Federal Reserve Economic Data
#To access data by API, using R rather than going to the website and downloading
#You need to request a key: you can do so at https://research.stlouisfed.org/docs/api/api_key.html
fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
#Get 1970s data
#Let's get the (main) series for the Yen-Dollar Exchange rate
EXJPUS<-fredr(series_id = "EXJPUS",observation_end=as.Date("1982-01-04")) 
#You can also download this series at https://fred.stlouisfed.org/series/DEXJPUS
#Let's get the (main) series for the Dollar-Pound Exchange rate
EXUSUK<-fredr(series_id = "EXUSUK",observation_end=as.Date("1982-01-04"))
#Let's get the (main) series for the German Deutschmark-Dollar Exchange rate
EXGEUS<-fredr(series_id = "EXGEUS",observation_end=as.Date("1982-01-04"))
#convert to time series and normalize so 1971-1 value is 1
ukus<-ts(EXUSUK$value[1]/EXUSUK$value,start=c(1971,1),frequency=12)
jpus<-ts(EXJPUS$value/EXJPUS$value[1],start=c(1971,1),frequency=12)
geus<-ts(EXGEUS$value/EXGEUS$value[1],start=c(1971,1),frequency=12)

currACFplots<-list()
currACFplots[[1]]<-ggAcf(jpus)+
  labs(title="Autocorrelation Function of Dollar-Yen Exchange Rate",subtitle = "Linear rate of decline is characteristic of nonstationary series")
currACFplots[[2]]<-ggAcf(ukus)+
  labs(title="Autocorrelation Function of Dollar-Pound Exchange Rate",subtitle = "Linear rate of decline is characteristic of nonstationary series")
currACFplots[[3]]<-ggAcf(geus)+
  labs(title="Autocorrelation Function of Dollar-Deutschmark Exchange Rate",subtitle = "Linear rate of decline is characteristic of nonstationary series")
grid.arrange(grobs=currACFplots)

Conditions on Hypothesis Class

Controlling Uniform Error: Theory

Controlling Generalization Error: Practical Advice

What happens if these conditions fail?

Simulations Showing Overfitting as Function of Complexity

Simulation Results

#Tlist<-c(50,100,300,500,1000)
Tlist<-c(80)
plist<-c(1,2,3,5,8,12,18,25,30,35,40,45)
nsims<-500

generalizationerror<-matrix(nrow=length(Tlist),ncol=length(plist))

for(T in 1:length(Tlist)){
  empiricalrisk<-matrix(nrow=nsims,ncol=length(plist))
  fcast<-matrix(nrow=nsims,ncol=length(plist))
  for(n in 1:nsims){
    datasim<-ts(rnorm(Tlist[T]))
    
    for(p in 1:length(plist)){
      arfc<-ar(datasim,aic = FALSE,order = plist[p])
      fcast[n,p]<-forecast(arfc,1)$mean
      empiricalrisk[n,p]<-mean((arfc$resid)^2,na.rm =TRUE) 
    }
  }
  approxrisk<-1+colMeans(fcast^2) 
      #Population risk is average over data sets of conditional risk given data
      #Conditional Risk given data is just mean squared error over y of fixed forecast yhat 
      #E_p(y-yhat)^2=1+yhat^2 using fact that true y is iid N(0,1)
  averageemprisk<-colMeans(empiricalrisk)
  generalizationerror[T,]<-approxrisk-averageemprisk
}


generrordist<-approxrisk-empiricalrisk
generrorq90<-numeric(length(plist))

for(p in 1:length(plist)){
       generrorq90[p]<-quantile(generrordist[,p],0.9) #90% quantile of generalization error for model p
}

oraclerisk<-numeric(length(plist))+1 #Oracle risk=1 for all models, because true data iid N(0,1), so optimal forecast is 0, with MSE 1

#Collect into dataset format for plotting
generalizationgap<-as.vector(generalizationerror[1,])
errormeasures<-data.frame(plist,approxrisk,averageemprisk,generalizationgap,generrorq90,oraclerisk)
errortable<-errormeasures %>%
  gather(approxrisk,averageemprisk,generalizationgap,generrorq90,oraclerisk,key="RiskMeasure",value="Risk")

#Plot Results
ggplot(errortable,aes(x=plist,y=Risk,color=RiskMeasure))+geom_point(size=2)+
  scale_color_discrete(labels =
        c("(Approximate) Population Risk", "Average Empirical Risk",
        "Average Generalization Error","90% Quantile of Generalization Error", "Oracle Risk")) +
  labs(title="Empirical vs Population Risk for AR(d) Models",
       subtitle="T=80, True Data IID N(0,1), 500 draws of dataset simulated",
       x="Order of Autoregressive Process",
       y="Average Square Loss",
       color="Risk Measure")

Interpretation

Generalization Error Control: Alternatives

Validation in practice

Meese and Rogoff, Revisited

What does this tell us?

Interpretation of Results

Next class

References