Outline

Review of Bayesian Approach

Applying the Bayesian Approach

Types of posterior calculations

Maximum a Posteriori (MAP) Estimate

Application: Bayesian Regression Model

MAP vs Structural Risk Minimization

Laplace Approximation

Applied Laplace Approximation

Monte Carlo Sampling Algorithms

Probabilistic Programming

Writing a Stan Program

Stan code for model

library(rstan) #Access Stan from R
stan_code_bernoulli <- " 
data { 
  int T;                     // The sample size
  int<lower=0,upper=1> y[T]; // The observations (array of length T)
  real<lower=0> prior_a;     // The a parameter of the prior: data because user allowed to enter value
  real<lower=0> prior_b;     // The b parameter of the prior
} 
parameters { 
  real<lower=0,upper=1> theta; // The probability of observing a one
} 
model { 
  theta ~ beta(prior_a,prior_b);        // Prior
  for (t in 1:T) 
    y[t] ~ bernoulli(theta);  // Likelihood: consists of N independent Bernoulli samples
} 
generated quantities { 
  int<lower=0,upper=1> y_tilde; // Samples from posterior predictive distribution
  y_tilde = bernoulli_rng(theta);  //Simulate draw from (Bernoulli) conditional likelihood 
}"

Run Code on Simulated Data

options(mc.cores = parallel::detectCores()) #Use parallel computing when running MCMC
rstan_options(auto_write = TRUE) #Do not recompile Stan code every time file is saved
# Compile this code as a model. Warning: compiling takes a long time
stan_model_bernoulli<-stan_model(model_code = stan_code_bernoulli,model_name = "BernoulliModel") 
# Generate some data to feed to the model
truetheta<-0.99   # Make 0s a rare event
aprior<-0.5       # Expect few 1s
bprior<-0.5       # Expect few 0s
Tsample<-1000     # Data size

set.seed(4567)    # Seed random number generator, so results always same 
ysample<-rbinom(Tsample,1,truetheta)    #Generate T Bernoulli variates
# Format data as list to feed to model
stan_data<-list(T=Tsample, y=ysample, prior_a=aprior, prior_b=bprior)
# Run the code to draw samples from the posterior
fit_bernoulli<-sampling(object = stan_model_bernoulli,data = stan_data, chains = 4, iter = 2000, seed = 4567)
#Run 4 chains of 2000 samples, and discard the first 1000 of each
print(fit_bernoulli) # Print summary statistics for posterior statistics 
stan_hist(fit_bernoulli) # Plot the approximated posterior density

Approximate Posterior and Posterior Predictive Densities

print(fit_bernoulli,digits_summary=3) # Print summary statistics for posterior statistics 
## Inference for Stan model: BernoulliModel.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean    sd    2.5%     25%     50%     75%   97.5%
## theta     0.985   0.000 0.004   0.976   0.982   0.985   0.987   0.991
## y_tilde   0.981   0.002 0.137   1.000   1.000   1.000   1.000   1.000
## lp__    -80.489   0.020 0.703 -82.572 -80.656 -80.217 -80.034 -79.983
##         n_eff  Rhat
## theta    1333 1.001
## y_tilde  3950 1.001
## lp__     1233 1.002
## 
## Samples were drawn using NUTS(diag_e) at Sun Oct 27 23:59:00 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
quietgg(stan_hist(fit_bernoulli)) # Plot the approximated posterior density, suppressing warning messages

#Calculate some statistics
thetamean<-get_posterior_mean(fit_bernoulli,pars="theta") #Get posterior mean of theta

Interpretation and Monte Carlo Diagnostics

Trace Plot of MCMC Chains

traceplot(fit_bernoulli,pars="theta")

Extending the Model

Stan Code

stan_code_logitAR <- " 
data { 
  int T;                     // The sample size
  int<lower=0,upper=1> y[T]; // The observations (array of length T)
  int<lower=0> K;            // Number of lags to use
  real<lower=0> prior_a;     // Prior variance of intercept
  real<lower=0> prior_b;     // The variance of coefficients on lags
} 
parameters { 
  real a;                    // Intercept coefficient
  real b[K];                 // K Lag coefficients
} 
model { 
  a ~ normal(0,prior_a);        // Intercept Prior
  for (k in 1:K)
    b[k] ~ normal(0,prior_b);   // Lag coefficient priors
  for (t in (K+1):T) {
    real mu = a;
    for (k in 1:K)
      mu += b[k] * y[t-k];       // Formula is sum of intercept and lags
    y[t] ~ bernoulli_logit(mu);  // Likelihood 
  }
}
generated quantities { 
  int<lower=0,upper=1> y_tilde;  // Samples from posterior predictive distribution
  for (t in (T-K):T) {           // Use last K observations to predict y[T+1] 
    real mu = a;
    for (k in 1:K)
      mu += b[k] * y[t-k];
    y_tilde = bernoulli_rng(inv_logit(mu));  //Simulate from conditional likelihood
  }
  }"

Results: MCMC Fit vs Maximum Likelihood Fit

summary(recessionlogit)$coefficients #MLE
##              Estimate  Std. Error      z value     Pr(>|z|)
## (Intercept)  -3.69266   0.1762321 -20.95339369 1.747251e-97
## l1usrec      20.85563 476.3834345   0.04377909 9.650805e-01
## l2usrec     -14.42507 476.3834351  -0.03028038 9.758434e-01
print(fit_logitAR) #Bayesian Prediction
## Inference for Stan model: LogitARModel.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##            mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff
## a         -3.70    0.00 0.18   -4.05   -3.81   -3.69   -3.58   -3.36  1582
## b[1]       9.33    0.07 1.84    6.66    7.98    9.03   10.33   13.75   727
## b[2]      -2.88    0.07 1.84   -7.31   -3.86   -2.55   -1.55   -0.20   715
## y_tilde    0.03    0.00 0.16    0.00    0.00    0.00    0.00    1.00  4064
## lp__    -283.23    0.05 1.33 -286.60 -283.81 -282.89 -282.26 -281.72   795
##         Rhat
## a          1
## b[1]       1
## b[2]       1
## y_tilde    1
## lp__       1
## 
## Samples were drawn using NUTS(diag_e) at Mon Oct 28 00:00:23 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Posterior and Posterior Predictive Distribution Samples

#Pairs plot
pairs(fit_logitAR,pars=c("a","b","y_tilde"))

Interpretation and Conclusion

Summary

References