Outline

High Dimensional Data

Setup

Dimensionality Reduction

Dimensionality Reduction Approaches

Factor Models: Static Case

Standard Estimator: Principal Components Analysis

Choosing the number of components

library(dplyr) #Data Manipulation
library(ggplot2)
library(fbi) #Tools specifically for working with FRED-MD and FRED-QD data

#Load Current FRED-MD Series, as downloaded from https://research.stlouisfed.org/econ/mccracken/fred-databases/
# Described in McCracken and Ng (2014) http://www.columbia.edu/~sn2294/papers/freddata.pdf
# For detrending, use "fbi" library, available by running commands
#library(devtools)
#devtools::install_github("cykbennie/fbi")

#Series of 748 observations of 129 macroeconomic indicators
currentMD<-read.csv("Data/fredmdcurrent.csv")


#PCA without detrending: not recommended, components pick up trends
#Drop date column, so result contains only data
fmd<-dplyr::select(currentMD,-one_of("sasdate"))
#Turn into time series
fredmdts<-ts(fmd,frequency=12,start=c(1959,1))
#Take PCA of Scaled Series
mdPCA<-prcomp(~.,data=fmd,na.action=na.exclude,scale=TRUE)
#Compute statistics
mdPCAinfo<-summary(mdPCA)

#PCA with detrending: uses differencing, log differencing, double differencing etc as needed
fmdtransformed<-fbi::fredmd(file="Data/fredmdcurrent.csv",transform = TRUE)
#Drop date column, so result contains only data
fmddt<-fmdtransformed[,2:129]
#Take PCA of Scaled Series
dtPCA<-prcomp(~.,data=fmddt,na.action=na.exclude,scale=TRUE)
#Compute statistics
dtPCAinfo<-summary(dtPCA)
#Alternate PCA Approaches in R

?prcomp #Default PCA in stats library
?princomp #As above, but with slightly different scaling; former is recommended for numerical accuracy reasons

?screeplot #Use to investigate magnitude of components

# Syntax example:

pc.cr <- princomp(USArrests, cor = TRUE)
screeplot(pc.cr)

#Nonlinear dimension reduction tools
library(Rtsne) #Library for t-SNE: 2d representation, for visualization
library(diffusionMap) #Diffusion Maps: nonlinear low dimensional representation from distance matrices (rather than covariance matrices between points) (cf adapreg for) regression on diffusion map components
library(kernlab) #Contains kernel PCA (and spectral clustering)

#To investigate
#install.packages("bayesdfa") #Bayesian dynamic factor models! Great, but want frequentist ones too
#install.packages("nowcasting") #Dynamic factor models for nowcasting!
#install.packages("tsfa") #Time Series factor models

Example: Principal Components Decomposition

2 Dimensional PCA Approximation of Data Set

#You can use default biplot(dtPCA) to get a version of this, including arrows displaying loadings, but it is too busy to read
# cf https://stackoverflow.com/questions/6578355/plotting-pca-biplot-with-ggplot2

## Plot PCA of series without detrending
# data <- data.frame(obsnames=row.names(mdPCA$x), mdPCA$x)
# plot <- ggplot(data, aes_string(x="PC1", y="PC2")) + geom_text(alpha=.4, size=3, aes(label=obsnames))+
#         ggtitle("First 2 Principal Components of Macro Series",subtitle="Data Labels are Time Period")
# plot

# Plot PCA of detrended series
data2 <- data.frame(obsnames=row.names(dtPCA$x), dtPCA$x)
plot2 <- ggplot(data2, aes_string(x="PC1", y="PC2")) + geom_text(alpha=.4, size=3, aes(label=obsnames))+
        ggtitle("First 2 Principal Components of Detrended Macro Series",subtitle="Data Labels are Time Period")
plot2

Alternatives: Nonlinear Methods and Sparsity

Factor Models: Dynamic Case

Two Step Estimation of Dynamic Factor Model

Factor-Augmented Regression

## Code to Fit Bayesian factor models
# Run this on your own time, as it is too slow to run each time I compile the file

#library(bayesdfa) #Bayesian Factor Models

# library(fredr) #Data Source
# fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
# 
# serieslist<-c("GS1M","GS3M","GS6M","GS1","GS2","GS3","GS5","GS7","GS10","GS20","GS30")
# 
# treasuryyields<-list()
# for (i in 1:length(serieslist)){
#   
#   val<-fredr(series=serieslist[i])
#   
#   treasuryyields[[i]]<-ts(val$value,frequency=12,names=serieslist[i],names=serieslist[i])
# }

# options(mc.cores = parallel::detectCores()) #Use parallel computing when running MCMC

# #Collect all Group 6 Variables: These are financial series
# finseries<-data.frame(currentMD$FEDFUNDS,currentMD$CP3Mx,currentMD$TB3MS,currentMD$TB6MS, currentMD$GS1,currentMD$GS5,currentMD$GS10, currentMD$AAA,currentMD$BAA,currentMD$COMPAPFFx,currentMD$TB3SMFFM,currentMD$TB6SMFFM,
#                       currentMD$T1YFFM,currentMD$T5YFFM,currentMD$T10YFFM,currentMD$AAAFFM,
#                       currentMD$BAAFFM,currentMD$TWEXAFEGSMTHx,currentMD$EXSZUSx,
#                       currentMD$EXJPUSx,currentMD$EXUSUKx,currentMD$EXCAUSx)
# 
# finpca<-prcomp(~.,data=finseries,na.action=na.exclude,scale=TRUE)

### Warning: With Data Size this large, following command does not appear feasible on a laptop
## Command works fine on small data sets, but not on this one
## #Fit Bayesian Dynamic Factor Model using "bayesdfa"
## dynfactormodel<- fit_dfa(
##   y = t(finseries), num_trends = 3, zscore = TRUE,
##   iter = 2000, chains = 4, thin = 1
## )

# ## Alternate approach: try 1 factor model of just small set of interest series 
# # This is also very slow, but at least feasible. Total estimation takes 76 minutes on a 4-core laptop.
# fins1<-data.frame(currentMD$FEDFUNDS, currentMD$GS1,currentMD$GS5,currentMD$GS10)
# # Bayesian 1 factor model, 4 normalized series, with random walk assumed for factor dynamics 
# dynfactormodel<- fit_dfa(
#   y = t(fins1), num_trends = 1, zscore = TRUE,
#   iter = 2000, chains = 4, thin = 1
# )
# #Select a rotation of factors (irrelevant with 1, but this also computes summary stats)
# rt<-rotate_trends(dynfactormodel) 
# #Plot factor over time
# plot_trends(rt)
# #Plot predictions for each series
# plot_fitted(dynfactormodel)
# #Plot posterior estimates of factor loadings
# plot_loadings(rt)

Exercise: GDP forecasting with a FAVAR

Nowcasting

Nowcasting: One Approach

  1. Estimate parameters of dynamic factor model
      1. Estimate \(d\) factors \(F_t\) from series by PCA on \(X_t\)
      1. Aggregate monthly series up to quarterly data, and estimate loadings of \(y_{t}\) on factors by Least Squares
      • \(\widehat{\beta}=\underset{\beta}{\arg\min}\sum_{k=1}^{T/3}(y_{k}-\sum_{j=1}^{d}\beta_j\widehat{f}_{j,3k})^2\)
      1. Estimate VAR in \(F_t\), \(F_t=AF_{t-1}+e_{t}\) by OLS using PCA factors
  2. Predict current factors using Kalman filter and use in forecast
      1. Run Kalman filter from dynamic factor model to get predicted factors \(\widehat{F}_{t}\) given current observations
      1. Predict \(\widehat{y}_{k}=\widehat{F}_{t}\widehat{\beta}\)
library(nowcasting)
#Example syntax for nowasting based on quarterly factor model as in Giannone, Reichlin, Small (2011)


# Old Syntax: manually aggregate 
# #Use Stock-Watson US GDP and indicators data
# gdp <- month2qtr(x = USGDP$base[,"RGDPGR"])
# # Extract Real GDP Growth from data
# gdp_position <- which(colnames(USGDP$base) == "RGDPGR")
# #Aggregate monthly up to quarterly data
# capture.output(base <- Bpanel(base = USGDP$base[,-gdp_position],trans = USGDP$legend$Transformation[-gdp_position],aggregate = TRUE),file="/dev/null")

#New syntax (automatically aggregates)
data(USGDP)
gdp_position <- which(colnames(USGDP$base) == "RGDPGR")
base <- Bpanel(base = USGDP$base[,-gdp_position],
               trans = USGDP$legend$Transformation[-gdp_position],
               aggregate = TRUE)
## [1] "AVGWKLYCL" "CPOUT"     "M3"
usdata <- cbind(USGDP$base[,"RGDPGR"], base)
colnames(usdata) <- c("RGDPGR", colnames(base))
frequency <- c(4, rep(12, ncol(usdata) -1))

Diagnostics: Find order of factor model by Bai-Ng Criterion

#Diagnostics 1: Select number of Factors by Bai-Ng Information Criterion
factorselect<-ICfactors(base)

#Diagnostics 2: Select number of lags and shocks by another info criterion
shockselect<-ICshocks(base,r=factorselect$r_star)

Results: Nowcast

#Run 2 step nowcasting, with optimal # of factors, lags, and error terms

#Old Syntax
#now2sq <- nowcast(y = gdp, x = base, r = factorselect$r_star, p = shockselect$p, q = shockselect$q_star, method = '2sq')

#New Syntax
now2s <- nowcast(formula = RGDPGR ~ ., data = usdata, r = factorselect$r_star, p = shockselect$p, q = shockselect$q_star,
          method = '2s', frequency = frequency)

#Plot results
nowcast.plot(now2s,type="fcst")

Results: Factor Estimates

#Plot results
nowcast.plot(now2s,type="factors")

#Additional Displays, Not Shown

##Fraction of variance Explained by each factor
#nowcast.plot(now2sq,type="eigenvalues")
## Display factor loadings of each series on Factor 1
#nowcast.plot(now2sq,type="eigenvectors")

Application: Fulcrum Nowcast

US daily growth nowcast

US daily growth nowcast

NY Fed Nowcast

April 9 2021 NYFed US Q2 GDP Growth Nowcast

April 9 2021 NYFed US Q2 GDP Growth Nowcast

Conclusions

References