Outline

High Dimensional Data

Setup

Dimensionality Reduction

Dimensionality Reduction Approaches

Factor Models: Static Case

Standard Estimator: Principal Components Analysis

Choosing the number of components

library(dplyr) #Data Manipulation
library(ggplot2)

#Load Current FRED-MD Series, as downloaded from https://research.stlouisfed.org/econ/mccracken/fred-databases/
# Described in McCracken and Ng (2014) http://www.columbia.edu/~sn2294/papers/freddata.pdf

#Series of 729 observations of 129 macroeconomic indicators
currentMD<-read.csv("Data/current.csv")
#Drop data column, so result contains only data
fmd<-select(currentMD,-one_of("sasdate"))
#Turn into time series
fredmdts<-ts(fmd,frequency=12,start=c(1959,1))

#Take PCA of Scaled Series
mdPCA<-prcomp(~.,data=fmd,na.action=na.exclude,scale=TRUE)

#Compute statistics
mdPCAinfo<-summary(mdPCA)

#Collect all Group 6 Variables: These are financial series
finseries<-data.frame(currentMD$FEDFUNDS,currentMD$CP3Mx,currentMD$TB3MS,currentMD$TB6MS, currentMD$GS1,currentMD$GS5,currentMD$GS10, currentMD$AAA,currentMD$BAA,currentMD$COMPAPFFx,currentMD$TB3SMFFM,currentMD$TB6SMFFM,
                      currentMD$T1YFFM,currentMD$T5YFFM,currentMD$T10YFFM,currentMD$AAAFFM,
                      currentMD$BAAFFM,currentMD$TWEXMMTH,currentMD$EXSZUSx,
                      currentMD$EXJPUSx,currentMD$EXUSUKx,currentMD$EXCAUSx)

finpca<-prcomp(~.,data=finseries,na.action=na.exclude,scale=TRUE)
#Alternate PCA Approaches in R

?prcomp #Default PCA in stats library
?princomp #As above, but with slightly different scaling?

?screeplot #Use to investigate magnitude of components

# Syntax example:

pc.cr <- princomp(USArrests, cor = TRUE)
screeplot(pc.cr)

#Nonlinear dimension reduction tools
library(Rtsne) #Library for t-SNE: 2d representation, for visualization
library(diffusionMap) #Diffusion Maps: nonlinear low dimensional representation from distance matrices (rather than covariance matrices between points) (cf adapreg for) regression on diffusion map components
library(kernlab) #Contains kernel PCA (and spectral clustering)

#To investigate
#install.packages("bayesdfa") #Bayesian dynamic factor models! Great, but want frequentist ones too
#install.packages("nowcasting") #Dynamic factor models for nowcasting!
#install.packages("tsfa") #Time Series factor models

Example: Principal Components Decomposition

2 Dimensional PCA Approximation of Data Set

#You can use default biplot(mdPCA) to get a version of this, including arrows displaying loadings, but it is too busy to read
# cf https://stackoverflow.com/questions/6578355/plotting-pca-biplot-with-ggplot2

data <- data.frame(obsnames=row.names(mdPCA$x), mdPCA$x)
plot <- ggplot(data, aes_string(x="PC1", y="PC2")) + geom_text(alpha=.4, size=3, aes(label=obsnames))+
        ggtitle("First 2 Principal Components of Macro Series",subtitle="Data Labels are Time Period")
plot

Alternatives: Nonlinear Methods and Sparsity

Factor Models: Dynamic Case

Two Step Estimation of Dynamic Factor Model

Factor-Augmented Regression

## Code to Fit Bayesian factor models
# Run this on your own time, as it is too slow to run each time I compile the file

#library(bayesdfa) #Bayesian Factor Models

# library(fredr) #Data Source
# fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
# 
# serieslist<-c("GS1M","GS3M","GS6M","GS1","GS2","GS3","GS5","GS7","GS10","GS20","GS30")
# 
# treasuryyields<-list()
# for (i in 1:length(serieslist)){
#   
#   val<-fredr(series=serieslist[i])
#   
#   treasuryyields[[i]]<-ts(val$value,frequency=12,names=serieslist[i],names=serieslist[i])
# }

# options(mc.cores = parallel::detectCores()) #Use parallel computing when running MCMC

### Warning: With Data Size this large, following command does not appear feasible on a laptop
## Command works fune on small data sets, but not on this one
## #Fit Bayesian Dynamic Factor Model using "bayesdfa"
## dynfactormodel<- fit_dfa(
##   y = t(finseries), num_trends = 3, zscore = TRUE,
##   iter = 2000, chains = 4, thin = 1
## )

# ## Alternate approach: try 1 factor model of just small set of interest series 
# # This is also very slow, but at least feasible. Total estimation takes 76 minutes on a 4-core laptop.
# fins1<-data.frame(currentMD$FEDFUNDS, currentMD$GS1,currentMD$GS5,currentMD$GS10)
# # Bayesian 1 factor model, 4 normalized series, with random walk assumed for factor dynamics 
# dynfactormodel<- fit_dfa(
#   y = t(fins1), num_trends = 1, zscore = TRUE,
#   iter = 2000, chains = 4, thin = 1
# )
# #Select a rotation of factors (irrelevant with 1, but this also computes summary stats)
# rt<-rotate_trends(dynfactormodel) 
# #Plot factor over time
# plot_trends(rt)
# #Plot predictions for each series
# plot_fitted(dynfactormodel)
# #Plot posterior estimates of factor loadings
# plot_loadings(rt)

Nowcasting

Nowcasting: One Approach

  1. Estimate parameters of dynamic factor model
      1. Estimate \(d\) factors \(F_t\) from series by PCA on \(X_t\)
      1. Aggregate monthly series up to quarterly data, and estimate loadings of \(y_{t}\) on factors by Least Squares
      • \(\widehat{\beta}=\underset{\beta}{\arg\min}\sum_{k=1}^{T/3}(y_{k}-\sum_{j=1}^{d}\beta_j\widehat{f}_{j,3k})^2\)
      1. Estimate VAR in \(F_t\), \(F_t=AF_{t-1}+e_{t}\) by OLS using PCA factors
  2. Predict current factors using Kalman filter and use in forecast
      1. Run Kalman filter from dynamic factor model to get predicted factors \(\widehat{F}_{t}\) given current observations
      1. Predict \(\widehat{y}_{k}=\widehat{F}_{t}\widehat{\beta}\)
library(nowcasting)
#Example syntax for nowasting based on quarterly factor model as in Giannone, Reichlin, Small (2011)


# Old Syntax: manually aggregate 
# #Use Stock-Watson US GDP and indicators data
# gdp <- month2qtr(x = USGDP$base[,"RGDPGR"])
# # Extract Real GDP Growth from data
# gdp_position <- which(colnames(USGDP$base) == "RGDPGR")
# #Aggregate monthly up to quarterly data
# capture.output(base <- Bpanel(base = USGDP$base[,-gdp_position],trans = USGDP$legend$Transformation[-gdp_position],aggregate = TRUE),file="/dev/null")

#New syntax (automatically aggregates)
data(USGDP)
gdp_position <- which(colnames(USGDP$base) == "RGDPGR")
base <- Bpanel(base = USGDP$base[,-gdp_position],
               trans = USGDP$legend$Transformation[-gdp_position],
               aggregate = TRUE)
## [1] "AVGWKLYCL" "CPOUT"     "M3"
usdata <- cbind(USGDP$base[,"RGDPGR"], base)
colnames(usdata) <- c("RGDPGR", colnames(base))
frequency <- c(4, rep(12, ncol(usdata) -1))

Diagnostics: Find order of factor model by Bai-Ng Criterion

#Diagnostics 1: Select number of Factors by Bai-Ng Information Criterion
factorselect<-ICfactors(base)

#Diagnostics 2: Select number of lags and shocks by another info criterion
shockselect<-ICshocks(base,r=factorselect$r_star)

Results: Nowcast

#Run 2 step nowcasting, with optimal # of factors, lags, and error terms

#Old Syntax
#now2sq <- nowcast(y = gdp, x = base, r = factorselect$r_star, p = shockselect$p, q = shockselect$q_star, method = '2sq')

#New Syntax
now2s <- nowcast(formula = RGDPGR ~ ., data = usdata, r = factorselect$r_star, p = shockselect$p, q = shockselect$q_star,
          method = '2s', frequency = frequency)

#Plot results
nowcast.plot(now2s,type="fcst")

Results: Factor Estimates

#Plot results
nowcast.plot(now2s,type="factors")

#Additional Displays, Not Shown

##Fraction of variance Explained by each factor
#nowcast.plot(now2s,type="eigenvalues")
## Display factor loadings of each series on Factor 1
#nowcast.plot(now2s,type="eigenvectors")

Application: Financial Times Nowcast

Conclusions

References