Quantile and Distribution Regression Results
- Use linear and quadratic functions of returns for past 2 trading weeks (10 days) to predict return distribution
#Restrict to shared dates
sdate<-"2011-03-30" #Start date of shared sample
edate<-"2021-03-15" #End date of sample
spr<-window(spz,start=sdate,end=edate)
#Construct lagged series and nonlinear transforms
spl1<-window(stats::lag(spz,-1),start=sdate,end=edate)
spl2<-window(stats::lag(spz,-2),start=sdate,end=edate)
spl3<-window(stats::lag(spz,-3),start=sdate,end=edate)
spl4<-window(stats::lag(spz,-4),start=sdate,end=edate)
spl5<-window(stats::lag(spz,-5),start=sdate,end=edate)
spl6<-window(stats::lag(spz,-6),start=sdate,end=edate)
spl7<-window(stats::lag(spz,-7),start=sdate,end=edate)
spl8<-window(stats::lag(spz,-8),start=sdate,end=edate)
spl9<-window(stats::lag(spz,-9),start=sdate,end=edate)
spl10<-window(stats::lag(spz,-10),start=sdate,end=edate)
qspl1<-window(stats::lag(spz,-1)^2,start=sdate,end=edate)
qspl2<-window(stats::lag(spz,-2)^2,start=sdate,end=edate)
qspl3<-window(stats::lag(spz,-3)^2,start=sdate,end=edate)
qspl4<-window(stats::lag(spz,-4)^2,start=sdate,end=edate)
qspl5<-window(stats::lag(spz,-5)^2,start=sdate,end=edate)
qspl6<-window(stats::lag(spz,-6)^2,start=sdate,end=edate)
qspl7<-window(stats::lag(spz,-7)^2,start=sdate,end=edate)
qspl8<-window(stats::lag(spz,-8)^2,start=sdate,end=edate)
qspl9<-window(stats::lag(spz,-9)^2,start=sdate,end=edate)
qspl10<-window(stats::lag(spz,-10)^2,start=sdate,end=edate)
#Concatenate into data frame
SPInfo<-data.frame(spr,spl1,spl2,spl3,spl4,spl5,spl6,spl7,spl8,spl9,spl10,
qspl1,qspl2,qspl3,qspl4,qspl5,qspl6,qspl7,qspl8,qspl9,qspl10)
#Create regression formula containing all terms
lnam <- paste0("spl", 1:10)
qlnam <- paste0("qspl", 1:10)
fmla <- as.formula(paste(paste("spr ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))
spquantile<-rq(fmla,tau=c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99),data=SPInfo)
today<-length(spr) #Last observation
todayslags<-data.frame(spr[today],spl1[today],spl2[today],spl3[today],spl4[today],spl5[today],spl6[today],spl7[today],spl8[today],spl9[today],
spr[today]^2,qspl1[today],qspl2[today],qspl3[today],qspl4[today],qspl5[today],qspl6[today],qspl7[today],qspl8[today],qspl9[today])
#Rename everything
names(todayslags)<-c("spl1","spl2","spl3","spl4","spl5","spl6","spl7","spl8","spl9","spl10",
"qspl1","qspl2","qspl3","qspl4","qspl5","qspl6","qspl7","qspl8","qspl9","qspl10")
#If any days are missing in recent sample, eg due to holidays, replace NAs with 0s: eg
#todayslags$spl1<-0
qpredictions<-predict(spquantile,newdata=todayslags)
rownames(qpredictions)<-c("2021-3-16") #Forecasts made using data on 15th, for date of 16th
## Distribution Regression
library(dplyr) #Manipulate data frame
#Use values -2,-1,-0.5,-0.2,0,0.2,0.5,1,2
#Adjust formula
fmla <- as.formula(paste(paste("spr1 ~ ", paste(lnam, collapse= "+")),paste("+",paste(qlnam, collapse= "+"))))
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-2,1,0))
spdr1<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
#Predictions should be of type "response" to get probabilities
pred1<-predict(spdr1,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-1,1,0))
spdr2<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred2<-predict(spdr2,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.5,1,0))
spdr3<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred3<-predict(spdr3,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=-0.2,1,0))
spdr4<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred4<-predict(spdr4,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0,1,0))
spdr5<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred5<-predict(spdr5,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.2,1,0))
spdr6<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred6<-predict(spdr6,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=0.5,1,0))
spdr7<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred7<-predict(spdr7,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=1,1,0))
spdr8<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred8<-predict(spdr8,newdata=todayslags,type="response")
SPInfo<-mutate(SPInfo,spr1=ifelse(spr<=2,1,0))
spdr9<-glm(fmla,family=binomial(link="logit"), data=SPInfo)
pred9<-predict(spdr9,newdata=todayslags,type="response")
#Make forecasts into data frames and plot
predvec<-c(pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9)
spreturnvals<-c(-2,-1,-0.5,-0.2,0,0.2,0.5,1,2)
dtype<-c("DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
"DistributionRegression","DistributionRegression","DistributionRegression","DistributionRegression",
"DistributionRegression")
distfcst<-data.frame(spreturnvals,predvec,dtype)
colnames(distfcst)<-c("Returns","Probabilities","Type")
#Make data.frame of quantile regression forecasts
tauvec<-c(0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.95,0.99)
rtype<-c("QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
"QuantileRegression","QuantileRegression","QuantileRegression","QuantileRegression",
"QuantileRegression")
qregfcst<-data.frame(t(as.matrix(qpredictions)),tauvec,rtype)
colnames(qregfcst)<-c("Returns","Probabilities","Type")
#Merge quantile and distribution forecasts for plotting
fcsts<-full_join(distfcst,qregfcst)
#Plot Both forecasts on same plot
ggplot(fcsts,aes(x=Returns,y=Probabilities,color=Type))+geom_point()+geom_line()+
ggtitle("Distribution and Quantile Regression Forecasts of S&P500 Returns for 3-16-2021",
subtitle="Quantile regression fixes Y values and estimates X, Distribution regression the reverse")+
xlab("Percent Return")+ylab("Conditional Cumulative Probability")
Application: GDP Growth Forecasting, Again
library(vars) #Run Vector Autoregression
library(tseries) #Perform tests, including Jarque Bera Test for Normality
#US GDP components
GDP<-fredr(series_id = "GDP",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-15"),
vintage_dates = as.Date("2021-03-15")) #Gross Domestic Product
#US GDP components from NIPA tables (cf http://www.bea.gov/national/pdf/nipaguid.pdf)
PCEC<-fredr(series_id = "PCEC",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-15"),
vintage_dates = as.Date("2021-03-15")) #Personal consumption expenditures
FPI<-fredr(series_id = "FPI",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-15"),
vintage_dates = as.Date("2021-03-15")) #Fixed Private Investment
CBI<-fredr(series_id = "CBI",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-15"),
vintage_dates = as.Date("2021-03-15")) #Change in Private Inventories
NETEXP<-fredr(series_id = "NETEXP",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-5"),
vintage_dates = as.Date("2021-03-15")) #Net Exports of Goods and Services
GCE<-fredr(series_id = "GCE",
observation_start = as.Date("1947-01-01"),
observation_end=as.Date("2021-03-15"),
vintage_dates = as.Date("2021-03-15")) #Government Consumption Expenditures and Gross Investment
#Format the series as quarterly time series objects, starting at the first date
gdp<-ts(GDP$value,frequency = 4,start=c(1947,1),names="Gross Domestic Product")
pcec<-ts(PCEC$value,frequency = 4,start=c(1947,1),names="Consumption")
fpi<-ts(FPI$value,frequency = 4,start=c(1947,1),names="Fixed Investment")
cbi<-ts(CBI$value,frequency = 4,start=c(1947,1),names="Inventory Growth")
invest<-fpi+cbi #Private Investment
netexp<-ts(NETEXP$value,frequency = 4,start=c(1947,1),names="Net Exports")
gce<-ts(GCE$value,frequency = 4,start=c(1947,1),names="Government Spending")
#Convert to log differences to ensure stationarity and collect in frame
NIPAdata<-ts(data.frame(diff(log(gdp)),diff(log(pcec)),diff(log(invest)),diff(log(gce))),frequency = 4,start=c(1947,2))
# Construct lags
l1NIPA<-window(stats::lag(NIPAdata,-1),start=c(1949,2),end=c(2020,4))
l2NIPA<-window(stats::lag(NIPAdata,-2),start=c(1949,2),end=c(2020,4))
l3NIPA<-window(stats::lag(NIPAdata,-3),start=c(1949,2),end=c(2020,4))
l4NIPA<-window(stats::lag(NIPAdata,-4),start=c(1949,2),end=c(2020,4))
l5NIPA<-window(stats::lag(NIPAdata,-5),start=c(1949,2),end=c(2020,4))
l6NIPA<-window(stats::lag(NIPAdata,-6),start=c(1949,2),end=c(2020,4))
l7NIPA<-window(stats::lag(NIPAdata,-7),start=c(1949,2),end=c(2020,4))
l8NIPA<-window(stats::lag(NIPAdata,-8),start=c(1949,2),end=c(2020,4))
#convert to matrices
xvarsdf<-data.frame(l1NIPA,l2NIPA,l3NIPA,l4NIPA,l5NIPA,l6NIPA,l7NIPA,l8NIPA)
xvars<-as.matrix(xvarsdf)
NIPAwindow<-matrix(window(NIPAdata,start=c(1949,2),end=c(2020,4)),length(l1NIPA[,1]),4)
#Calculate information criteria
IC<-VARselect(NIPAdata,lag.max=8)
# Fit Optimal VARs by AIC, BIC
AICVAR<-VAR(NIPAdata,p=IC$selection[1])
BICVAR<-VAR(NIPAdata,p=IC$selection[3])
#Construct 1 period ahead foecasts
AICpred<-forecast(AICVAR,h=4)$forecast
BICpred<-forecast(BICVAR,h=4)$forecast
GDPline<-window(ts(NIPAdata[,1],frequency = 4,start=c(1947,2),names="Gross Domestic Product"),start=c(2009,2))
autoplot(GDPline,showgap=FALSE)+
autolayer(BICpred$diff.log.gdp.,series="VAR(1)",alpha=0.4)+
autolayer(AICpred$diff.log.gdp.,series="VAR(5)",alpha=0.4)+
ggtitle("VAR(1) and VAR(5) GDP Forecasts with 80 and 95% Normal-Distribution Based Intervals")+
xlab("Date")+ylab("log GDP growth")