Today

Nonlinear to nonparametric methods

Local Estimation: Discrete case

Example: Education vs. log Wages (Code 1)

#Load Libraries for nonparametric estimation
#If not installed, uncomment to install 
# "np" library for nonparametrics
#install.packages("np",
#  repos="http://cran.us.r-project.org")
library(np)
#Load data on Wages
library(foreign)
wagedat<-read.dta(
  "http://fmwww.bc.edu/ec-p/data/wooldridge/wage1.dta")
#Run nonparametric regression of wages on education
#Use local average estimator
# See np help files for syntax
wagebw<-npregbw(ydat=wagedat$lwage,
        xdat=ordered(wagedat$educ))

Example: Education vs. log Wages (Code 2)

#Pretty up Names
wagebw$xnames<-"Years of Education"
wagebw$ynames<-"Log Wage"
# Plot with standard error estimates
plot(wagebw, plot.errors.method="asymptotic",
     main="Wages vs Education")
## Add data points
points(wagedat$educ,wagedat$lwage,
       cex=0.2,col="blue")

Example: Education vs. log Wages again

Local Estimation: Continuous Case

Principle: Bias-Variance Tradeoff

Local Linear Regression

Example: Wages and Education, Smoothed (Code 1)

#Estimate using package "np", one of many R packages 
#Local Constant Estimator (regtype="lc")
#Use arbitrary width h=2 instead of computer supplied one
# (bws=c(2),bandwidth.compute=FALSE)
#To estimate by formula above, use ckertype="uniform"
localconstantwage<-npregbw(ydat=wagedat$lwage,
  xdat=wagedat$educ, regtype="lc",bws=c(2),
  bandwidth.compute=FALSE, ckertype="uniform")))
#Local Linear estimator
#Same, but local linear (regtype="ll")
locallinearwage<-npregbw(ydat=wagedat$lwage,
    xdat=wagedat$educ,regtype="ll",bws=c(2),
    bandwidth.compute=FALSE, ckertype="uniform")))

Example: Wages and Education, Smoothed (Code 2)

#Pretty up Names
localconstantwage$xnames<-"Years of Education"
localconstantwage$ynames<-"Log Wage"
# Plot
plot(localconstantwage, main= 
  "Wage vs Education: Local Constant Estimator, h=2")
points(wagedat$educ,wagedat$lwage,cex=0.2,col="blue")

Example: Wages and Education, Smoothed (Code 3)

#Pretty up Names
locallinearwage$xnames<-"Years of Education"
locallinearwage$ynames<-"Log Wage"
# Plot
plot(locallinearwage, main= 
    "Wage vs Education: Local Linear Estimator, h=2")
points(wagedat$educ,wagedat$lwage,cex=0.2,col="blue")

Example: Wages and Education, Smoothed

Example: Wages and Education, Smoothed

Choosing the “bandwidth”

Correct Rate

Cross Validation

Aside: Code

Example with cross-validated bandwidth (Code)

#Local Linear estimator with cross validated bandwidth
#Cross-validation is default, so no extra command
cvwagereg<-npregbw(ydat=wagedat$lwage,
    xdat=wagedat$educ,regtype="ll", 
    ckertype="uniform")))
#Display Results
summary(cvwagereg)

Example with cross-validated bandwidth

## 
## Regression Data (526 observations, 1 variable(s)):
## 
## Regression Type: Local-Linear
## Bandwidth Selection Method: Least Squares Cross-Validation
## Bandwidth Type: Fixed
## Objective Function Value: 0.2228431 (achieved on multistart 1)
## 
## Exp. Var. Name: wagedat$educ Bandwidth: 1.020967 Scale Factor: 2.410932
## 
## Continuous Kernel Type: Uniform
## No. Continuous Explanatory Vars.: 1
## Estimation Time: 0.349 seconds

Example: Wages and Education, Optimally Smoothed (Code)

#Pretty up Names
cvwagereg$xnames<-"Years of Education"
cvwagereg$ynames<-"Log Wage"
# Plot
plot(cvwagereg, main= 
  "Wage vs Education: Local Linear Estimator, optimal h")
points(wagedat$educ,wagedat$lwage,cex=0.2,col="blue")

Example: Wages and Education, Optimally Smoothed

Kernels

Kernel Regression

Multivariate Nonparametric Regression

Curse of Dimensionality

Example: Wages vs Education, Experience, and Tenure (Code 1)

#Local Linear estimator with cross validated bandwidth
#Cross-validation is default, so no extra command
multiplenpreg<-npregbw(formula=wagedat$lwage~ 
  wagedat$educ + wagedat$exper + 
  wagedat$tenure,regtype="ll")))

Example: Wages vs Education, Experience, and Tenure (Code 2)

#Pretty up Names
multiplenpreg$xnames<-c("Years of Education",
  "Years of Experience","Years in Current Job")

multiplenpreg$ynames<-"Log Wage"
#Display info
(summary(multiplenpreg))

Example: Wages vs Education, Experience, and Tenure

## 
## Regression Data (526 observations, 3 variable(s)):
## 
## Regression Type: Local-Linear
## Bandwidth Selection Method: Least Squares Cross-Validation
## Formula: wagedat$lwage ~ wagedat$educ + wagedat$exper + wagedat$tenure
## Bandwidth Type: Fixed
## Objective Function Value: 0.1773463 (achieved on multistart 1)
## 
## Exp. Var. Name: wagedat$educ   Bandwidth: 4.203802 Scale Factor: 6.939537
## Exp. Var. Name: wagedat$exper  Bandwidth: 7.468948 Scale Factor: 1.346861
## Exp. Var. Name: wagedat$tenure Bandwidth: 6.078937 Scale Factor: 5.017483
## 
## Continuous Kernel Type: Second-Order Gaussian
## No. Continuous Explanatory Vars.: 3
## Estimation Time: 21.427 seconds
## NULL

Wage vs Education, Experience, Tenure at median of others (Code)

# Plot
plot(multiplenpreg, main= "Wage vs Covariates")

Wage vs Education, Experience, Tenure at median of others

Summary

Next Class

Bonus Slide: Alternative: Series Regression

Bonus Slide: Higher Order Regression