Job Search Intervention Study (JOBS II) is a randomized field experiment that investigates the efficacy of a job training intervention on unemployed workers. 1,801 unemployed workers received a pre-screening questionnaire and were then randomly assigned to treatment and control groups. Those in the treatment group participatedin job-skills workshops where respondents learned job-search skills and strategies. Those in the control condition received a booklet describing job-search tips. In follow-up interviews, the two key outcome variables were measured; a continuous measure of depressive symptoms based on the Hopkins Symptom Checklist,and a binary variable, representing whether the respondent had become employed.
Key covariates
treat: Indicator variable for whether participant was randomly selected for the JOBS II training program. 1 = assignment to participation.
comply: Indicator variable for whether participant actually participated in the JOBS II program. 1 = participation.
job-seek: A continuous scale measuring the level of job-search self-efficacy with values from 1 to 5
sex: Indicator variable for sex. 1 = female
age: Age in years.
marital: Factor with five categories for marital status.
nonwhite: Indicator variable for race. 1 = nonwhite.
educ: Factor with five categories for educational attainment.
income: Factor with five categories for level of income.
## data can be downloaded at url("https://github.com/kosukeimai/mediation/blob/master/data/jobs.RData")
load("~/Downloads/jobs.RData")
jobs
Z <- jobs$treat
W <- jobs$comply
Y <- jobs$job_seek
getX <- lm(treat ~ sex + age + marital + nonwhite + educ + income, data = jobs)
X <- model.matrix(getX)[, -1]
dim(X)
## [1] 899 15
## Functions modified from Peng's book 21.2.2
IV_Wald <- function(Z, W, Y) {
ITT_W <- mean(W[Z == 1]) - mean(W[Z == 0])
ITT_Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
CATE <- ITT_Y/ITT_W
return(c(ITT_W, ITT_Y, CATE))
}
## calculate the standard error using formula
IV_Wald_sd <- function(Z, W, Y){
est <- IV_Wald(Z, W, Y)
var_ITTW <- var(W[Z == 1])/sum(Z) + var(W[Z == 0])/sum(1-Z)
var_ITTY <- var(Y[Z == 1])/sum(Z) + var(Y[Z == 0])/sum(1-Z)
cov_ITTWY <- cov(W[Z == 1], Y[Z == 1])/sum(Z) + cov(W[Z == 0], Y[Z == 0])/sum(1-Z)
var_CATE <- (var_ITTY + est[3]^2 * var_ITTW - 2 * est[3] * cov_ITTWY)/est[1]^2
return(c(est[3], sqrt(var_CATE)))
}
## calculate standard error using bootstrap
IV_Wald_bootstrap <- function(Z, W, Y, n.boot = 200){
est <- IV_Wald(Z, W, Y)
CATEboot <- sapply(1:n.boot, function(i) {
id.boot <- sample(1:length(Z), replace = T)
IV_Wald(Z[id.boot], W[id.boot], Y[id.boot])[3]
})
return(c(est[3], sd(CATEboot)))
}
IV_linear <- function(Z, W, Y, X) {
X <- scale(as.matrix(X))
ITT_W <- lm(W ~ Z + X + Z*X)$coef[2]
ITT_Y <- lm(Y ~ Z + X + Z*X)$coef[2]
CATE <- ITT_Y/ITT_W
return(c(ITT_W, ITT_Y, CATE))
}
IV_linear_bootstrap <- function(Z, W, Y, X, n.boot = 200){
est <- IV_linear(Z, W, Y, X)
CATEboot <- sapply(1:n.boot, function(i) {
id.boot <- sample(1:length(Z), replace = T)
IV_linear(Z[id.boot], W[id.boot], Y[id.boot], X[id.boot, ])[3]
})
return(c(est[3], sd(CATEboot)))
}
##
results <- rbind(IV_Wald_sd(Z, W, Y),
IV_Wald_bootstrap(Z, W, Y, n.boot = 10^3),
IV_linear_bootstrap(Z, W, Y, X, n.boot = 10^3))
results <- cbind(results, results[, 1]-1.96 *results[, 2], results[, 1]+1.96 *results[, 2])
row.names(results) <- c("formula" , "bootstrap" , "covariates adjustment + bootstrap" )
colnames(results) <- c ("est", "se", "lower CI", "upper CI")
round (results, 3)
## est se lower CI upper CI
## formula 0.109 0.081 -0.050 0.268
## bootstrap 0.109 0.081 -0.050 0.268
## covariates adjustment + bootstrap 0.118 0.083 -0.044 0.280
est <- IV_Wald(Z, W, Y)
names(est) <- c("ITT_W", "ITT_Y", "CATE")
est
## ITT_W ITT_Y CATE
## 0.62000000 0.06745002 0.10879036
summary(lm(W ~ Z))
##
## Call:
## lm(formula = W ~ Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62 -0.62 0.00 0.38 0.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.740e-16 2.296e-02 0.00 1
## Z 6.200e-01 2.810e-02 22.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.397 on 897 degrees of freedom
## Multiple R-squared: 0.3518, Adjusted R-squared: 0.351
## F-statistic: 486.8 on 1 and 897 DF, p-value: < 2.2e-16