Outline

Review of Bayesian Approach

Applying the Bayesian Approach

Types of posterior calculations

Maximum a Posteriori (MAP) Estimate

Application: Bayesian Regression Model

MAP vs Structural Risk Minimization

Laplace Approximation

Applied Laplace Approximation

Monte Carlo Sampling Algorithms

Probabilistic Programming

Writing a Stan Program

Stan code for model

library(rstan) #Access Stan from R
options(mc.cores = parallel::detectCores()) #Use parallel computing when running MCMC
rstan_options(auto_write = TRUE) #Do not recompile Stan code every time file is saved
// Model is in stan code chunk 
// compiled as output.var="stan_model_bernoulli"
data { 
  int T;                     // The sample size
  int<lower=0,upper=1> y[T]; // The observations (array of length T)
  real<lower=0> prior_a;     // The a parameter of the prior: data because user allowed to enter value
  real<lower=0> prior_b;     // The b parameter of the prior
} 
parameters { 
  real<lower=0,upper=1> theta; // The probability of observing a one
} 
model { 
  theta ~ beta(prior_a,prior_b);        // Prior
  for (t in 1:T) 
    y[t] ~ bernoulli(theta);  // Likelihood: consists of N independent Bernoulli samples
} 
generated quantities { 
  int<lower=0,upper=1> y_tilde; // Samples from posterior predictive distribution
  y_tilde = bernoulli_rng(theta);  //Simulate draw from (Bernoulli) conditional likelihood 
}

Run Code on Simulated Data

# Generate some data to feed to the model
truetheta<-0.99   # Make 0s a rare event
aprior<-0.5       # Expect few 1s
bprior<-0.5       # Expect few 0s
Tsample<-1000     # Data size

set.seed(4567)    # Seed random number generator, so results always same 
ysample<-rbinom(Tsample,1,truetheta)    #Generate T Bernoulli variates
# Format data as list to feed to model
stan_data<-list(T=Tsample, y=ysample, prior_a=aprior, prior_b=bprior)
# Run the code to draw samples from the posterior
fit_bernoulli<-sampling(object = stan_model_bernoulli,data = stan_data, chains = 4, iter = 2000, seed = 4567)
#Run 4 chains of 2000 samples, and discard the first 1000 of each
print(fit_bernoulli) # Print summary statistics for posterior statistics 
stan_hist(fit_bernoulli) # Plot the approximated posterior density

Posterior Statistics and Model Fit

print(fit_bernoulli,digits_summary=3) # Print summary statistics for posterior statistics 
## Inference for Stan model: ee00873fe649a4709da2cb99b1c5ea5c.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean    sd    2.5%     25%     50%     75%   97.5% n_eff
## theta     0.984   0.000 0.004   0.976   0.982   0.985   0.987   0.991  1501
## y_tilde   0.985   0.002 0.123   1.000   1.000   1.000   1.000   1.000  4135
## lp__    -80.473   0.014 0.666 -82.408 -80.634 -80.217 -80.035 -79.983  2140
##          Rhat
## theta   1.004
## y_tilde 1.001
## lp__    1.000
## 
## Samples were drawn using NUTS(diag_e) at Thu May 13 15:58:54 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Approximate Posterior and Posterior Predictive Densities

quietgg(stan_hist(fit_bernoulli)) # Plot the approximated posterior density, suppressing warning messages

#Calculate some statistics
thetamean<-get_posterior_mean(fit_bernoulli,pars="theta") #Get posterior mean of theta

Interpretation and Monte Carlo Diagnostics

Trace Plot of MCMC Chains

traceplot(fit_bernoulli,pars="theta")

Extending the Model

Stan Code

// Stan model "stan_model_logitAR"
data { 
  int T;                     // The sample size
  int<lower=0,upper=1> y[T]; // The observations (array of length T)
  int<lower=0> K;            // Number of lags to use
  real<lower=0> prior_a;     // Prior variance of intercept
  real<lower=0> prior_b;     // The variance of coefficients on lags
} 
parameters { 
  real a;                    // Intercept coefficient
  real b[K];                 // K Lag coefficients
} 
model { 
  a ~ normal(0,prior_a);        // Intercept Prior
  for (k in 1:K)
    b[k] ~ normal(0,prior_b);   // Lag coefficient priors
  for (t in (K+1):T) {
    real mu = a;
    for (k in 1:K)
      mu += b[k] * y[t-k];       // Formula is sum of intercept and lags
    y[t] ~ bernoulli_logit(mu);  // Likelihood 
  }
}
generated quantities { 
  int<lower=0,upper=1> y_tilde;  // Samples from posterior predictive distribution
  for (t in (T-K):T) {           // Use last K observations to predict y[T+1] 
    real mu = a;
    for (k in 1:K)
      mu += b[k] * y[t-k];
    y_tilde = bernoulli_rng(inv_logit(mu));  //Simulate from conditional likelihood
  }
  }

Results: MCMC Fit vs Maximum Likelihood Fit

library(fredr) #Access FRED, the Federal Reserve Economic Data
fredr_set_key("8782f247febb41f291821950cf9118b6") #Key I obtained for this class
#The NBER recession indicator has FRED ID "USREC"
#1 in NBER declared recession, 0 in any other date
USREC<-fredr(series_id = "USREC",vintage_dates = as.Date("2021-03-23"))
usrec<-ts(USREC$value,frequency=12,start=c(1854,12)) #Convert to time series
#Produce lagged values
l1usrec<-window(lag(usrec,-1),start=c(1855,2),end=c(2021,2))
l2usrec<-window(lag(usrec,-2),start=c(1855,2),end=c(2021,2))
recession<-window(usrec,start=c(1855,2),end=c(2021,2))
#Compute Logistic regression by ERM as before, equal to MLE
recessionlogit<-glm(recession~l1usrec+l2usrec,family=binomial(link="logit"))
## Assemble data for Stan model
recessiondata<-as.integer(recession) #Binary recession indicators
Tmonths<-length(recessiondata) #Length of sample
avariance<-10 #Prior variance of intercept
bvariance<-5 #Prior variance of lag
nlags<-2  #Order of autoregression model to use
stan_data2<-list(T=Tmonths, y=recession, K=nlags, prior_a=avariance, prior_b=bvariance)
# Run the code to draw samples from the posterior
fit_logitAR<-sampling(object = stan_model_logitAR,data = stan_data2, chains = 4, iter = 2000, seed = 5678)
summary(recessionlogit)$coefficients #MLE
##               Estimate  Std. Error      z value     Pr(>|z|)
## (Intercept)  -3.672571   0.1736639 -21.14757785 2.904785e-99
## l1usrec      20.855653 473.4168105   0.04405347 9.648618e-01
## l2usrec     -14.425761 473.4168114  -0.03047159 9.756910e-01
print(fit_logitAR) #Bayesian Prediction
## Inference for Stan model: 5a957408e4fb81b8843aa522c190ba77.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## a         -3.68    0.00 0.17   -4.01   -3.79   -3.67   -3.56   -3.35  1562 1.00
## b[1]       9.13    0.06 1.66    6.63    7.90    8.88   10.09   13.01   673 1.01
## b[2]      -2.68    0.06 1.66   -6.58   -3.65   -2.45   -1.47   -0.17   660 1.01
## y_tilde    0.94    0.00 0.24    0.00    1.00    1.00    1.00    1.00  4040 1.00
## lp__    -287.78    0.04 1.20 -290.87 -288.37 -287.50 -286.92 -286.37   893 1.00
## 
## Samples were drawn using NUTS(diag_e) at Thu May 13 16:00:06 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Posterior and Posterior Predictive Distribution Samples

#Pairs plot
pairs(fit_logitAR,pars=c("a","b","y_tilde"))

Try it yourself

Interpretation and Conclusion

Summary

References