rq()
in library quantreg
with standard R formula syntax eg rq(y~z1+z2,tau=0.2)
npcdens
in R library np
)grf
)quantregForest
) is random forest applied to \(1\{Y_i<u\}:\ u\in\mathcal{U}\)drf
) computes the weights based on a test of identical distribution applicable to high dimensional data
#Libraries
library(quantreg) #quantile regression
library(quantregForest) #quantile random forests
library(ggplot2) #Plots
library(drf) #distributional random forests
library(Counterfactual) #Conditional counterfactual distributions
library(npcausal) #Conditional counterfactual distributions
library(haven) #Stata data
library(dplyr) #data manipulation
#Data set from Breza Kaur and Shamdasani (2021)
#In my case this is saved at
rationing<-read_dta("Data/rationing_worker.dta")
spilloversample<-dplyr::filter(rationing,surv_el==1 & sample_spillover==1 & e_manwage==1)
spilloversample$treated<-factor(spilloversample$treat)
spilloversample$peakseason<-factor(spilloversample$peak, labels=c("Off-Peak Season","Peak Season"))
ggplot(spilloversample)+facet_wrap(~peakseason)+
stat_ecdf(aes(x=dailywagetot,color=treated))+
ggtitle("Wage Distribution Response to Labor Supply Shock: Conditional CDF",
subtitle="Breza, Kaur, and Shamdasani (2021)")+
ylab("Fraction of Workers")+xlab("Daily Total Wage (Rupees)")
- Alternately represent by conditional kernel density, which applies smoothing and may improve interpretation, but is less precisely estimated due to slower rate of density vs CDF estimation
ggplot(spilloversample)+facet_wrap(~peakseason)+
geom_density(aes(x=dailywagetot,fill=treated,color=treated),alpha=0.5)+
ggtitle("Wage Distribution Response to Labor Supply Shock: Conditional Kernel Density",
subtitle="Breza, Kaur, and Shamdasani (2021)")+
ylab("Probability Density of Workers")+xlab("Daily Total Wage (Rupees)")
peaksample<-filter(spilloversample,peak==1) #Just on-peak observations
taus <- c(1:99)/100 #Estimate at 99 quantiles evenly spaced
#Control for baseline characteristics as in Table 3 Col. 3 of Breza et al (2021)
logitres<-counterfactual(ldailywagetot_w~bl_ldwagetot+bl_dwagetot_miss+bl_e_manwage+bl_e_manwage_miss+factor(round_id),
data=peaksample, group=treat, treatment=TRUE, quantiles=taus,
decomposition=TRUE, method="logit", seed=123,
printdeco=FALSE,
sepcore = TRUE,ncore=2)
#Steal plotting code from software vignette
first <- sum(as.double(taus <= .10))
last <- sum(as.double(taus <= .90))
rang <- c(first:last)
duqf_SE<- (logitres$resSE)[,1]
l.duqf_SE<- (logitres$resSE)[,3]
u.duqf_SE<- (logitres$resSE)[,4]
duqf_CE<- (logitres$resCE)[,1]
l.duqf_CE<- (logitres$resCE)[,3]
u.duqf_CE<- (logitres$resCE)[,4]
duqf_TE<- (logitres$resTE)[,1]
l.duqf_TE<- (logitres$resTE)[,3]
u.duqf_TE<- (logitres$resTE)[,4]
range_x <- min(c(min(l.duqf_SE[rang]),min(l.duqf_CE[rang]),min(l.duqf_TE[rang])))
range_y <- max(c(max(u.duqf_SE[rang]),max(u.duqf_CE[rang]),max(u.duqf_TE[rang])))
par(mfrow=c(1,3))
plot(c(0,1), range(c(range_x, range_y)), xlim = c(0,1), type = "n",
xlab = expression(tau), ylab = "Difference in Wages", cex.lab=0.75, main = "Total")
polygon(c(taus[rang],rev(taus[rang])),c(u.duqf_TE[rang], rev(l.duqf_TE[rang])), density = -100, border = F,
col="grey70",lty=1,lwd=1)
lines(taus[rang],duqf_TE[rang])
abline(h=0,lty=2)
plot(c(0,1), range(c(range_x, range_y)), xlim = c(0,1), type = "n",
xlab = expression(tau), ylab = "Difference in Wages", cex.lab=0.75, main = "Structure")
polygon(c(taus[rang],rev(taus[rang])),c(u.duqf_SE[rang], rev(l.duqf_SE[rang])), density = -100, border = F,
col="grey70",lty=1,lwd=1)
lines(taus[rang],duqf_SE[rang])
abline(h=0,lty=2)
plot(c(0,1), range(c(range_x, range_y)), xlim = c(0,1), type = "n",
xlab = expression(tau), ylab = "Difference in Wages", cex.lab=0.75, main = "Composition")
polygon(c(taus[rang],rev(taus[rang])),c(u.duqf_CE[rang], rev(l.duqf_CE[rang])), density = -100, border = F,
col="grey70",lty=1,lwd=1)
lines(taus[rang],duqf_CE[rang])
abline(h=0,lty=2)
xyz<-data.frame(peaksample$ldailywagetot_w,peaksample$treat,peaksample$bl_ldwagetot,peaksample$bl_dwagetot_miss,peaksample$bl_e_manwage,peaksample$bl_e_manwage_miss,peaksample$round_id)
xyzclean<-as.matrix(filter(xyz,is.na(xyz)==FALSE))
xcovariates<-xyzclean[,3:7]
treat<-xyzclean[,2]
lwage<-xyzclean[,1]
set.seed(1234)
cdensity(lwage,treat,xcovariates,kmax=10,progress_updates=FALSE)
## parameter est se ci.ll ci.ul
## 1 L2 distance 0.1637761 0.05830747 0.04949343 0.2780587