Outline

Uncertainty

Example: risk-sensitive decisions

Distribution Prediction

Direct Approach to Distribution Prediction

Confidence Intervals by Quantile Regression

CDF Forecasting by Distibution Regression

Cautions for Direct Forecasts

Extremal Forecasts and Catastrophic Risk

Handling Extremal Forecasts

Example: S&P500 Distribution Forecasts

library(fpp2) #Forecasting
library(quantreg) #Quantile Regression
library(fredr) #Access FRED Data
library(zoo) #Irregular time series tools
library(knitr) #Tables
library(kableExtra) #Fancy Tables

fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class

#Warning: Available FRED S&P Data is on rolling 10 Year Window
# if you rerun this code in the future using FRED, it WILL fail, because earliest data point moves forward
# This has to be some kind of business agreement thing
# Use saved data frame instead of online source to replicate results 

## Obtain and transform SP500 Data
#Original fredr approach
#SPreturns<-fredr(series_id = "SP500",units="pch",observation_end=as.Date("2019-2-20")) #Percent Daily Change in S&P500 Index

#Saved File Obtained From FRED on 3/30/2019
#Load from File: Change to your folder
load("~/Dropbox/Forecasting/Website Version/Forecasting/Data/SPreturns.RData")

#Format as "zoo" object: irregularly spaced time series
#Spacing not regular because SP500 traded only on non-holiday weekdays
spz<-zoo(x=SPreturns$value,order.by=SPreturns$date)  

autoplot(spz)+
  ggtitle("S&P 500 Daily Percent Returns Since 2009")+
  xlab("Date")+ylab("Percent Return")

Quantile and Distribution Regression Results

spr<-window(spz,start="2009-4-13",end="2019-2-20") #Restrict to shared dates
#Construct lagged series and nonlinear transforms
spl1<-window(stats::lag(spz,-1),start="2009-4-13",end="2019-2-20")
spl2<-window(stats::lag(spz,-2),start="2009-4-13",end="2019-2-20")
spl3<-window(stats::lag(spz,-3),start="2009-4-13",end="2019-2-20")
spl4<-window(stats::lag(spz,-4),start="2009-4-13",end="2019-2-20")
spl5<-window(stats::lag(spz,-5),start="2009-4-13",end="2019-2-20")
spl6<-window(stats::lag(spz,-6),start="2009-4-13",end="2019-2-20")
spl7<-window(stats::lag(spz,-7),start="2009-4-13",end="2019-2-20")
spl8<-window(stats::lag(spz,-8),start="2009-4-13",end="2019-2-20")
spl9<-window(stats::lag(spz,-9),start="2009-4-13",end="2019-2-20")
spl10<-window(stats::lag(spz,-10),start="2009-4-13",end="2019-2-20")
qspl1<-window(stats::lag(spz,-1)^2,start="2009-4-13",end="2019-2-20")
qspl2<-window(stats::lag(spz,-2)^2,start="2009-4-13",end="2019-2-20")
qspl3<-window(stats::lag(spz,-3)^2,start="2009-4-13",end="2019-2-20")
qspl4<-window(stats::lag(spz,-4)^2,start="2009-4-13",end="2019-2-20")
qspl5<-window(stats::lag(spz,-5)^2,start="2009-4-13",end="2019-2-20")
qspl6<-window(stats::lag(spz,-6)^2,start="2009-4-13",end="2019-2-20")
qspl7<-window(stats::lag(spz,-7)^2,start="2009-4-13",end="2019-2-20")
qspl8<-window(stats::lag(spz,-8)^2,start="2009-4-13",end="2019-2-20")
qspl9<-window(stats::lag(spz,-9)^2,start="2009-4-13",end="2019-2-20")
qspl10<-window(stats::lag(spz,-10)^2,start="2009-4-13",end="2019-2-20")

#Concatenate into data frame
SPInfo<-data.frame(spr,spl1,spl2,spl3,spl4,spl5,spl6,spl7,spl8,spl9,spl10,
                   qspl1,qspl2,qspl3,qspl4,qspl5,qspl6,qspl7,qspl8,qspl9,qspl10)

#Create regression formula containing all terms
lnam <- paste0("spl", 1:10)
qlnam <- paste0("qspl", 1:10)
fmla <- as.formula(paste(paste("spr ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))

spquantile<-rq(fmla,tau=c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99),data=SPInfo)

today<-length(spr) #Last observation

todayslags<-data.frame(spr[today],spl1[today],spl2[today],spl3[today],spl4[today],spl5[today],spl6[today],spl7[today],spl8[today],spl9[today],
                       spr[today]^2,qspl1[today],qspl2[today],qspl3[today],qspl4[today],qspl5[today],qspl6[today],qspl7[today],qspl8[today],qspl9[today])
#Rename everything
names(todayslags)<-c("spl1","spl2","spl3","spl4","spl5","spl6","spl7","spl8","spl9","spl10",
                     "qspl1","qspl2","qspl3","qspl4","qspl5","qspl6","qspl7","qspl8","qspl9","qspl10")

#Replace NAs from President's day Holiday with 0s
todayslags$spl2<-0
todayslags$spl3<-0
todayslags$qspl2<-0
todayslags$qspl3<-0

qpredictions<-predict(spquantile,newdata=todayslags)
rownames(qpredictions)<-c("2019-2-21") #Forecasts made using data on 20th, for date of 21st

## Following is a failed attempt to convert output to a forecast object
## Issue seems to be with use of irregular time series
# spmedian<-zoo(qpredictions[4],"2019-2-21")
# spupper<-qpredictions[5:7]
# names(spupper)=c("50%","80%","90%")
# splower<-qpredictions[1:3]
# names(splower)=c("90%","80%","50%")
# output<-list(mean=spmedian,x=spr,upper=spupper,
#              lower=splower,level=c(50,80,90),method="Quantile Regression")
# qfcst<-structure(output,class='forecast')

# #Display Forecasts as a table
# kable(qpredictions,
#   caption="Quantile Regression Return Forecasts") %>%
#   kable_styling(bootstrap_options = "striped")



## Distribution Regression

library(dplyr) #Manipulate data frame

#Use values -2,-1,-0.5,-0.2,0,0.2,0.5,1,2
#Adjust formula
fmla <- as.formula(paste(paste("spr1 ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-2,1,0))
spdr1<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
#Predictions should be o type "response" to get probabilities
pred1<-predict(spdr1,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-1,1,0))
spdr2<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred2<-predict(spdr2,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.5,1,0))
spdr3<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred3<-predict(spdr3,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.2,1,0))
spdr4<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred4<-predict(spdr4,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0,1,0))
spdr5<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred5<-predict(spdr5,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.2,1,0))
spdr6<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred6<-predict(spdr6,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.5,1,0))
spdr7<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred7<-predict(spdr7,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=1,1,0))
spdr8<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred8<-predict(spdr8,newdata=todayslags,type="response")

SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=2,1,0))
spdr9<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred9<-predict(spdr9,newdata=todayslags,type="response")

predvec<-c(pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9)
spreturnvals<-c(-2,-1,-0.5,-0.2,0,0.2,0.5,1,2)
dtype<-c("DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
        "DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
        "DistributionRegression")


distfcst<-data.frame(spreturnvals,predvec,dtype)
colnames(distfcst)<-c("Returns","Probabilities","Type")

#MAke data.frame of quantile regression forecasts
tauvec<-c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99)
rtype<-c("QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
         "QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
         "QuantileRegression")
qregfcst<-data.frame(t(as.matrix(qpredictions)),tauvec,rtype)
colnames(qregfcst)<-c("Returns","Probabilities","Type")

#MArge quantile and distribution forecasts for plotting
fcsts<-full_join(distfcst,qregfcst)

#Plot Both forecasts on same plot
ggplot(fcsts,aes(x=Returns,y=Probabilities,color=Type))+geom_point()+geom_line()+
  ggtitle("Distribution and Quantile Regression Forecasts of S&P500 Returns for 2-21-2019",
          subtitle="Quantile regression fixes Y values and estimates X, Distribution regression the reverse")+
  xlab("Percent Return")+ylab("Conditional Cumulative Probability")

Forecast Error Based Approach

Prediction Intervals from Forecast Error

Normal Residuals Case

Non-independent residuals

Cautions for Indirect Forecasts

Application: GDP Growth Forecasting, Again

library(vars) #Run Vector Autoregression
library(tseries) #Perform tests, including Jarque Bera Test for Normality

#US GDP components 
GDP<-fredr(series_id = "GDP",
           observation_start = as.Date("1947-01-01")) #Gross Domestic Product

#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
            observation_start = as.Date("1947-01-01")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
           observation_start = as.Date("1947-01-01")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
           observation_start = as.Date("1947-01-01")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
              observation_start = as.Date("1947-01-01")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
           observation_start = as.Date("1947-01-01")) #Government Consumption Expenditures and Gross Investment

#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product") 
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")

#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))

# Construct lags

l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2018,3))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2018,3))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2018,3))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2018,3))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2018,3))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2018,3))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2018,3))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2018,3))

#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2018,3)),length(l1NIPA[,1]),4)


#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)

# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])

#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=4)$forecast
BICpred<-forecast(BICVAR,h=4)$forecast


GDPline<-window(ts(NIPAdata[,1],frequency = 4,start=c(1947,2),names="Gross Domestic Product"),start=c(2009,2))
autoplot(GDPline,showgap=FALSE)+
  autolayer(BICpred$diff.log.gdp.,series="VAR(1)",alpha=0.4)+
  autolayer(AICpred$diff.log.gdp.,series="VAR(5)",alpha=0.4)+
  ggtitle("VAR(1) and VAR(5) GDP Forecasts with 80 and 95% Normal-Distribution Based Intervals")+
  xlab("Date")+ylab("log GDP growth")

Residual Diagnostics for VAR(1) Forecast

checkresiduals(BICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals
## LM test = 36.01, df = 10, p-value = 8.386e-05

Residual Diagnostics for VAR(5) Forecast

checkresiduals(AICVAR$varresult$diff.log.gdp..) #GDP Growth forecast residual diagnostic

## 
##  Breusch-Godfrey test for serial correlation of order up to 24
## 
## data:  Residuals
## LM test = 31.235, df = 24, p-value = 0.1471
jbt<-jarque.bera.test(AICVAR$varresult$diff.log.gdp..$residuals) #Test normality
archt<-arch.test(AICVAR) #Test heteroskedasticity

Interpretation

Conclusions