Outline

Uncertainty

Example: risk-sensitive decisions

Distribution Prediction

Direct Approach to Distribution Prediction

Confidence Intervals by Quantile Regression

CDF Forecasting by Distibution Regression

Cautions for Direct Forecasts

Extremal Forecasts and Catastrophic Risk

Handling Extremal Forecasts

Example: S&P500 Distribution Forecasts

library(fpp2) #Forecasting
library(quantreg) #Quantile Regression
library(fredr) #Access FRED Data
library(zoo) #Irregular time series tools
library(knitr) #Tables
library(kableExtra) #Fancy Tables

fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class

#Warning: Available FRED S&P Data is on rolling 10 Year Window
# if you rerun this code in the future using FRED, it WILL fail, because earliest data point moves forward
# This has to be some kind of business agreement thing
# Use saved data frame instead of online source to replicate results 

## Obtain and transform SP500 Data
#Original fredr approach
#SPreturns<-fredr(series_id = "SP500",units="pch",observation_end=as.Date("2021-3-15")) #Percent Daily Change in S&P500 Index

#Saved File Obtained From FRED on 3/15/2021
load("Data/SPreturns.RData")

#Format as "zoo" object: irregularly spaced time series
#Spacing not regular because SP500 traded only on non-holiday weekdays
spz<-zoo(x=SPreturns$value,order.by=SPreturns$date)  

autoplot(spz)+
  ggtitle("S&P 500 Daily Percent Returns Since 2011")+
  xlab("Date")+ylab("Percent Return")

Quantile and Distribution Regression Results

#Restrict to shared dates
sdate<-"2011-03-30" #Start date of shared sample
edate<-"2021-03-15" #End date of sample
spr<-window(spz,start=sdate,end=edate) 

#Construct lagged series and nonlinear transforms
spl1<-window(stats::lag(spz,-1),start=sdate,end=edate)
spl2<-window(stats::lag(spz,-2),start=sdate,end=edate)
spl3<-window(stats::lag(spz,-3),start=sdate,end=edate)
spl4<-window(stats::lag(spz,-4),start=sdate,end=edate)
spl5<-window(stats::lag(spz,-5),start=sdate,end=edate)
spl6<-window(stats::lag(spz,-6),start=sdate,end=edate)
spl7<-window(stats::lag(spz,-7),start=sdate,end=edate)
spl8<-window(stats::lag(spz,-8),start=sdate,end=edate)
spl9<-window(stats::lag(spz,-9),start=sdate,end=edate)
spl10<-window(stats::lag(spz,-10),start=sdate,end=edate)
qspl1<-window(stats::lag(spz,-1)^2,start=sdate,end=edate)
qspl2<-window(stats::lag(spz,-2)^2,start=sdate,end=edate)
qspl3<-window(stats::lag(spz,-3)^2,start=sdate,end=edate)
qspl4<-window(stats::lag(spz,-4)^2,start=sdate,end=edate)
qspl5<-window(stats::lag(spz,-5)^2,start=sdate,end=edate)
qspl6<-window(stats::lag(spz,-6)^2,start=sdate,end=edate)
qspl7<-window(stats::lag(spz,-7)^2,start=sdate,end=edate)
qspl8<-window(stats::lag(spz,-8)^2,start=sdate,end=edate)
qspl9<-window(stats::lag(spz,-9)^2,start=sdate,end=edate)
qspl10<-window(stats::lag(spz,-10)^2,start=sdate,end=edate)

#Concatenate into data frame
SPInfo<-data.frame(spr,spl1,spl2,spl3,spl4,spl5,spl6,spl7,spl8,spl9,spl10,
                   qspl1,qspl2,qspl3,qspl4,qspl5,qspl6,qspl7,qspl8,qspl9,qspl10)

#Create regression formula containing all terms
lnam <- paste0("spl", 1:10)
qlnam <- paste0("qspl", 1:10)
fmla <- as.formula(paste(paste("spr ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))

spquantile<-rq(fmla,tau=c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99),data=SPInfo)

today<-length(spr) #Last observation

todayslags<-data.frame(spr[today],spl1[today],spl2[today],spl3[today],spl4[today],spl5[today],spl6[today],spl7[today],spl8[today],spl9[today],
                       spr[today]^2,qspl1[today],qspl2[today],qspl3[today],qspl4[today],qspl5[today],qspl6[today],qspl7[today],qspl8[today],qspl9[today])
#Rename everything
names(todayslags)<-c("spl1","spl2","spl3","spl4","spl5","spl6","spl7","spl8","spl9","spl10",
                     "qspl1","qspl2","qspl3","qspl4","qspl5","qspl6","qspl7","qspl8","qspl9","qspl10")

#If any days are missing in recent sample, eg due to holidays, replace NAs with 0s: eg
#todayslags$spl1<-0


qpredictions<-predict(spquantile,newdata=todayslags)
rownames(qpredictions)<-c("2021-3-16") #Forecasts made using data on 15th, for date of 16th
## Distribution Regression

library(dplyr) #Manipulate data frame

#Use values -2,-1,-0.5,-0.2,0,0.2,0.5,1,2

#Adjust formula
fmla <- as.formula(paste(paste("spr1 ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-2,1,0))
spdr1<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
#Predictions should be of type "response" to get probabilities
pred1<-predict(spdr1,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-1,1,0))
spdr2<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred2<-predict(spdr2,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.5,1,0))
spdr3<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred3<-predict(spdr3,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.2,1,0))
spdr4<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred4<-predict(spdr4,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0,1,0))
spdr5<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred5<-predict(spdr5,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.2,1,0))
spdr6<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred6<-predict(spdr6,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.5,1,0))
spdr7<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred7<-predict(spdr7,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=1,1,0))
spdr8<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred8<-predict(spdr8,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=2,1,0))
spdr9<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred9<-predict(spdr9,newdata=todayslags,type="response")
#Make forecasts into data frames and plot
predvec<-c(pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9)
spreturnvals<-c(-2,-1,-0.5,-0.2,0,0.2,0.5,1,2) 
dtype<-c("DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
        "DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
        "DistributionRegression") 


distfcst<-data.frame(spreturnvals,predvec,dtype)
colnames(distfcst)<-c("Returns","Probabilities","Type")

#Make data.frame of quantile regression forecasts
tauvec<-c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99)
rtype<-c("QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
         "QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
         "QuantileRegression")
qregfcst<-data.frame(t(as.matrix(qpredictions)),tauvec,rtype)
colnames(qregfcst)<-c("Returns","Probabilities","Type")

#Merge quantile and distribution forecasts for plotting
fcsts<-full_join(distfcst,qregfcst)

#Plot Both forecasts on same plot
ggplot(fcsts,aes(x=Returns,y=Probabilities,color=Type))+geom_point()+geom_line()+
  ggtitle("Distribution and Quantile Regression Forecasts of S&P500 Returns for 3-16-2021",
          subtitle="Quantile regression fixes Y values and estimates X, Distribution regression the reverse")+
  xlab("Percent Return")+ylab("Conditional Cumulative Probability")

Forecast Error Based Approach

Prediction Intervals from Forecast Error

Normal Residuals Case

Non-independent residuals

Cautions for Indirect Forecasts

Application: GDP Growth Forecasting, Again

library(vars) #Run Vector Autoregression
library(tseries) #Perform tests, including Jarque Bera Test for Normality

#US GDP components 
GDP<-fredr(series_id = "GDP",
           observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-15"), 
           vintage_dates = as.Date("2021-03-15")) #Gross Domestic Product

#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
            observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-15"), 
           vintage_dates = as.Date("2021-03-15")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
           observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-15"), 
           vintage_dates = as.Date("2021-03-15")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
           observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-15"), 
           vintage_dates = as.Date("2021-03-15")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
              observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-5"), 
           vintage_dates = as.Date("2021-03-15")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
           observation_start = as.Date("1947-01-01"),
           observation_end=as.Date("2021-03-15"), 
           vintage_dates = as.Date("2021-03-15")) #Government Consumption Expenditures and Gross Investment

#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product") 
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")

#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))

# Construct lags

l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2020,4))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2020,4))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2020,4))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2020,4))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2020,4))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2020,4))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2020,4))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2020,4))

#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2020,4)),length(l1NIPA[,1]),4)


#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)

# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])

#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=4)$forecast
BICpred<-forecast(BICVAR,h=4)$forecast


GDPline<-window(ts(NIPAdata[,1],frequency = 4,start=c(1947,2),names="Gross Domestic Product"),start=c(2009,2))
autoplot(GDPline,showgap=FALSE)+
  autolayer(BICpred$diff.log.gdp.,series="VAR(1)",alpha=0.4)+
  autolayer(AICpred$diff.log.gdp.,series="VAR(5)",alpha=0.4)+
  ggtitle("VAR(1) and VAR(5) GDP Forecasts with 80 and 95% Normal-Distribution Based Intervals")+
  xlab("Date")+ylab("log GDP growth")

Residual Diagnostics for VAR(1) Forecast

checkresiduals(BICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals
## LM test = 26.209, df = 10, p-value = 0.003469

Residual Diagnostics for VAR(5) Forecast

checkresiduals(AICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic

## 
##  Breusch-Godfrey test for serial correlation of order up to 24
## 
## data:  Residuals
## LM test = 24.697, df = 24, p-value = 0.4224

Interpretation

jbt<-jarque.bera.test(AICVAR$varresult$diff.log.gdp..$residuals) #Test normality
archt<-arch.test(AICVAR) #Test heteroskedasticity

Exercise: Risk mangement

Conclusions