Description of the Jobs II data

Job Search Intervention Study (JOBS II) is a randomized field experiment that investigates the efficacy of a job training intervention on unemployed workers. 1,801 unemployed workers received a pre-screening questionnaire and were then randomly assigned to treatment and control groups. Those in the treatment group participatedin job-skills workshops where respondents learned job-search skills and strategies. Those in the control condition received a booklet describing job-search tips. In follow-up interviews, the two key outcome variables were measured; a continuous measure of depressive symptoms based on the Hopkins Symptom Checklist,and a binary variable, representing whether the respondent had become employed.

Obtain data

## data can be downloaded at url("https://github.com/kosukeimai/mediation/blob/master/data/jobs.RData")
load("~/Downloads/jobs.RData")
jobs

Define IV, treatment, outcome and pre-treatment covariates

Z <- jobs$treat
W <- jobs$comply
Y <- jobs$job_seek

getX <- lm(treat ~ sex + age + marital + nonwhite + educ + income, data = jobs)
X <- model.matrix(getX)[, -1]
dim(X)
## [1] 899  15

CATE estimation and inference

functions for CATE estimate without adjusting for covariates

## Functions modified from Peng's book 21.2.2
IV_Wald <- function(Z, W, Y) {
  ITT_W <- mean(W[Z == 1]) - mean(W[Z == 0])
  ITT_Y <- mean(Y[Z == 1]) - mean(Y[Z == 0])
  CATE <- ITT_Y/ITT_W
  return(c(ITT_W, ITT_Y, CATE))
}

## calculate the standard error using formula
IV_Wald_sd <- function(Z, W, Y){
  est <- IV_Wald(Z, W, Y)
  var_ITTW <- var(W[Z == 1])/sum(Z) + var(W[Z == 0])/sum(1-Z)
  var_ITTY <- var(Y[Z == 1])/sum(Z) + var(Y[Z == 0])/sum(1-Z)
  cov_ITTWY <- cov(W[Z == 1], Y[Z == 1])/sum(Z) + cov(W[Z == 0], Y[Z == 0])/sum(1-Z)
  
  var_CATE <- (var_ITTY + est[3]^2 * var_ITTW - 2 * est[3] * cov_ITTWY)/est[1]^2
  return(c(est[3], sqrt(var_CATE)))
}

## calculate standard error using bootstrap
IV_Wald_bootstrap <- function(Z, W, Y, n.boot = 200){
  est <- IV_Wald(Z, W, Y)
  CATEboot <- sapply(1:n.boot, function(i) {
    id.boot <- sample(1:length(Z), replace = T)
    IV_Wald(Z[id.boot], W[id.boot], Y[id.boot])[3]
  })
  return(c(est[3], sd(CATEboot)))
}

functions for CATE estimate after adjusting for covariates

IV_linear <- function(Z, W, Y, X) {
  X <- scale(as.matrix(X))
  ITT_W <- lm(W ~ Z + X + Z*X)$coef[2]
  ITT_Y <- lm(Y ~ Z + X + Z*X)$coef[2]
  CATE <- ITT_Y/ITT_W
  return(c(ITT_W, ITT_Y, CATE))
}

IV_linear_bootstrap <- function(Z, W, Y, X, n.boot = 200){
  est <- IV_linear(Z, W, Y, X)
  CATEboot <- sapply(1:n.boot, function(i) {
    id.boot <- sample(1:length(Z), replace = T)
    IV_linear(Z[id.boot], W[id.boot], Y[id.boot], X[id.boot, ])[3]
  })
  return(c(est[3], sd(CATEboot)))
}

Apply the functions on the jobs data

## 
results <- rbind(IV_Wald_sd(Z, W, Y),
                 IV_Wald_bootstrap(Z, W, Y, n.boot = 10^3),
                 IV_linear_bootstrap(Z, W, Y, X, n.boot = 10^3))

results <- cbind(results, results[, 1]-1.96 *results[, 2], results[, 1]+1.96 *results[, 2])

row.names(results) <- c("formula" , "bootstrap" , "covariates adjustment + bootstrap" )
colnames(results) <- c ("est", "se", "lower CI", "upper CI")
round (results, 3)
##                                     est    se lower CI upper CI
## formula                           0.109 0.081   -0.050    0.268
## bootstrap                         0.109 0.081   -0.050    0.268
## covariates adjustment + bootstrap 0.118 0.083   -0.044    0.280

Evaluate the strength of the IV

est <- IV_Wald(Z, W, Y)
names(est) <- c("ITT_W", "ITT_Y", "CATE")
est
##      ITT_W      ITT_Y       CATE 
## 0.62000000 0.06745002 0.10879036
summary(lm(W ~ Z))
## 
## Call:
## lm(formula = W ~ Z)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -0.62  -0.62   0.00   0.38   0.38 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.740e-16  2.296e-02    0.00        1    
## Z            6.200e-01  2.810e-02   22.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.397 on 897 degrees of freedom
## Multiple R-squared:  0.3518, Adjusted R-squared:  0.351 
## F-statistic: 486.8 on 1 and 897 DF,  p-value: < 2.2e-16