We still use the lalonde data, perform trimming as we did before. We can also remove units whose eps are outside of the range the control (for a treated unit) or outside of the range of the treated (for a control units)
library("MatchIt")
data("lalonde")
model <- glm(treat ~ re74 + race + married + I(re74^2) + re74:race, data = lalonde, family = "binomial")
eps <- predict(model, type = "response")
rm.idx <- which(eps < 0.1 | eps > 0.9)
idx.treated <- which((lalonde$treat == 1) & (eps > max(eps[lalonde$treat == 0])))
idx.control <- which((lalonde$treat == 0) & (eps < min(eps[lalonde$treat == 1])))
rm.idx <- union(rm.idx, c(idx.treated, idx.control))
lalonde <- lalonde[-rm.idx, ]
eps <- eps[-rm.idx]
lalonde
- Now, we perform sequencing splitting to find the subclasses
## First, create a data frame to store the current grouping information
temp <- data.frame(e = eps, treat = lalonde$treat,
b = 1)
t.max <- 1.96
# check whether t. stat is above t.max for first iteration
t.stat <- t.test(x = temp$e[temp$treat == 1], y = temp$e[temp$treat == 0], var.equal=T)$statistic
condition = t.stat > t.max
# minimum n.t or n.c in each block
size <- 3
size.new <- 20
# continue until all t statistics are below t.max
set.seed(0)
while(condition)
{
# calculate size in each group
# we want to not split a block if it is too small
b <- max(temp$b)
ignore <- sapply(1:b, function(j) {
n.t <- sum(temp$treat == 1 & temp$b == j)
n.c <- sum(temp$treat == 0 & temp$b == j)
return(n.t < size | n.c < size | (n.t + n.c < size.new * 2))
})
# split unbalanced blocks into more blocks
split <- which((abs(t.stat) > t.max) & (!ignore))
if(length(split) == 0)
break
## we need to keep a current copy of the block information and which block to ignore as block assignments are going to change later
b.current <- temp$b
for(j in split)
{
cutoff <- median(temp$e[b.current == j])
## We split units into two new blocks
## extract the index of units belonging to each new stratum
idx.s <- which(b.current == j & temp$e < cutoff)
idx.l <- which(b.current == j & temp$e > cutoff)
## randomly put half of the ties into one category
idx.e <- sample(which(temp$e == cutoff & b.current == j))
n.tie <- length(idx.e)
if (n.tie >= 1) {
if (n.tie > 1)
idx.s <- c(idx.s, idx.e[1:round(n.tie/2)])
idx.l <- c(idx.l, idx.e[(round(n.tie/2)+ 1):n.tie])
}
## we split only when new stratum has at least size number of control/treated units
if (sum(temp$treat[idx.s]==1) > size && sum(temp$treat[idx.s]==0) > size &&
sum(temp$treat[idx.l]==1) > size && sum(temp$treat[idx.l]==0) > size) {
# anything above the current will have to be moved up 1
temp$b[b.current > j] <- temp$b[b.current > j] + 1
## the upper new stratum will also have the block idx added by 1
temp$b[idx.l] <- temp$b[idx.l] + 1
}
## We don't do anything if we do not want to split
}
# calculate t statistic for each block
b <- max(temp$b)
t.stat <- sapply(1:b, function(j) {
t.test(x = temp$e[temp$treat == 1 & temp$b == j],
y = temp$e[temp$treat == 0 & temp$b == j], var.equal=T)$statistic
})
## Update condition
# check whether ANY blocks are above t.max
# AND are not too small
condition <- any(abs(t.stat) > t.max)
}
lalonde$blocks <- temp$b
table(lalonde[, c("treat", "blocks")])
blocks
treat 1 2 3 4 5
0 72 36 18 7 67
1 7 12 8 19 130
## check the range of estimated propensity scores
sapply(1:max(lalonde$blocks), function(j) range(temp$e[temp$b == j]))
[,1] [,2] [,3] [,4] [,5]
[1,] 0.1011357 0.1451386 0.2376577 0.4672575 0.5555612
[2,] 0.1451386 0.2376577 0.4567325 0.5555612 0.7043876
- Next, we treat the data as from a stratified randomized experiment and use Neyman’s estimator
## CI from Neyman
est_per_block <- sapply(1:max(lalonde$blocks), function(idx) {
Yt <- lalonde$re78[lalonde$blocks == idx & lalonde$treat == 1]
Yc <- lalonde$re78[lalonde$blocks == idx & lalonde$treat == 0]
tau_hat <- mean(Yt) - mean(Yc)
st <- sd(Yt)
sc <- sd(Yc)
varhat_tauhat <- st^2/length(Yt) + sc^2/length(Yc)
return(c(tau_hat, varhat_tauhat))
})
colnames(est_per_block)<- 1:max(lalonde$blocks)
rownames(est_per_block) <- c("mean", "var")
est_per_block
1 2 3 4 5
mean 2252.462 -1206.69 3752.993 -980.4199 1318.621
var 7019811.047 2558729.60 11917739.160 12153089.3809 1021087.344
wts <- table(lalonde$blocks)/length(lalonde$blocks)
tau_hat <- sum(est_per_block[1, ] * wts)
sd_tau_hat <- sqrt(sum(est_per_block[2, ] * wts^2))
result <- c(tau_hat, sd_tau_hat, c(tau_hat- 1.96 * sd_tau_hat, tau_hat + 1.96 * sd_tau_hat))
names(result) <- c("est", "sd", "CI_lower", "CI_upper")
result
est sd CI_lower CI_upper
1201.8050 864.2807 -492.1852 2895.7952
This result is very close to what we get from IPW.
LS0tCnRpdGxlOiAiUiBFeGFtcGxlIDc6IFN1YmNsYXNzaWZpY2F0aW9uIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpXZSBzdGlsbCB1c2UgdGhlIGxhbG9uZGUgZGF0YSwgcGVyZm9ybSB0cmltbWluZyBhcyB3ZSBkaWQgYmVmb3JlLiBXZSBjYW4gYWxzbyByZW1vdmUgdW5pdHMgd2hvc2UgZXBzIGFyZSBvdXRzaWRlIG9mIHRoZSByYW5nZSB0aGUgY29udHJvbCAoZm9yIGEgdHJlYXRlZCB1bml0KSBvciBvdXRzaWRlIG9mIHRoZSByYW5nZSBvZiB0aGUgdHJlYXRlZCAoZm9yIGEgY29udHJvbCB1bml0cykKCmBgYHtyfQpsaWJyYXJ5KCJNYXRjaEl0IikKZGF0YSgibGFsb25kZSIpCgptb2RlbCA8LSBnbG0odHJlYXQgfiByZTc0ICsgcmFjZSArIG1hcnJpZWQgKyBJKHJlNzReMikgKyByZTc0OnJhY2UsIGRhdGEgPSBsYWxvbmRlLCBmYW1pbHkgPSAiYmlub21pYWwiKQplcHMgPC0gcHJlZGljdChtb2RlbCwgdHlwZSA9ICJyZXNwb25zZSIpCgpybS5pZHggPC0gd2hpY2goZXBzIDwgMC4xIHwgZXBzID4gMC45KQppZHgudHJlYXRlZCA8LSB3aGljaCgobGFsb25kZSR0cmVhdCA9PSAxKSAmIChlcHMgPiBtYXgoZXBzW2xhbG9uZGUkdHJlYXQgPT0gMF0pKSkKaWR4LmNvbnRyb2wgPC0gd2hpY2goKGxhbG9uZGUkdHJlYXQgPT0gMCkgJiAoZXBzIDwgbWluKGVwc1tsYWxvbmRlJHRyZWF0ID09IDFdKSkpCnJtLmlkeCA8LSB1bmlvbihybS5pZHgsIGMoaWR4LnRyZWF0ZWQsIGlkeC5jb250cm9sKSkKCmxhbG9uZGUgPC0gbGFsb25kZVstcm0uaWR4LCBdCmVwcyA8LSBlcHNbLXJtLmlkeF0KbGFsb25kZQpgYGAKCgoKLSBOb3csIHdlIHBlcmZvcm0gc2VxdWVuY2luZyBzcGxpdHRpbmcgdG8gZmluZCB0aGUgc3ViY2xhc3NlcwpgYGB7cn0KIyMgRmlyc3QsIGNyZWF0ZSBhIGRhdGEgZnJhbWUgdG8gc3RvcmUgdGhlIGN1cnJlbnQgZ3JvdXBpbmcgaW5mb3JtYXRpb24KdGVtcCA8LSBkYXRhLmZyYW1lKGUgPSBlcHMsIHRyZWF0ID0gbGFsb25kZSR0cmVhdCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgYiA9IDEpCgp0Lm1heCA8LSAgMS45NgojIGNoZWNrIHdoZXRoZXIgdC4gc3RhdCBpcyBhYm92ZSB0Lm1heCBmb3IgZmlyc3QgaXRlcmF0aW9uCnQuc3RhdCA8LSB0LnRlc3QoeCA9IHRlbXAkZVt0ZW1wJHRyZWF0ID09IDFdLCB5ID0gIHRlbXAkZVt0ZW1wJHRyZWF0ID09IDBdLCB2YXIuZXF1YWw9VCkkc3RhdGlzdGljCmNvbmRpdGlvbiA9IHQuc3RhdCA+IHQubWF4CiMgbWluaW11bSBuLnQgb3Igbi5jIGluIGVhY2ggYmxvY2sKc2l6ZSA8LSAzCnNpemUubmV3IDwtIDIwCgojIGNvbnRpbnVlIHVudGlsIGFsbCB0IHN0YXRpc3RpY3MgYXJlIGJlbG93IHQubWF4CnNldC5zZWVkKDApCndoaWxlKGNvbmRpdGlvbikKewogICMgY2FsY3VsYXRlIHNpemUgaW4gZWFjaCBncm91cAogICMgd2Ugd2FudCB0byBub3Qgc3BsaXQgYSBibG9jayBpZiBpdCBpcyB0b28gc21hbGwKICBiIDwtIG1heCh0ZW1wJGIpCiAgaWdub3JlIDwtIHNhcHBseSgxOmIsIGZ1bmN0aW9uKGopIHsKICAgICBuLnQgPC0gc3VtKHRlbXAkdHJlYXQgPT0gMSAmIHRlbXAkYiA9PSBqKQogICAgIG4uYyA8LSBzdW0odGVtcCR0cmVhdCA9PSAwICYgdGVtcCRiID09IGopCiAgICAgcmV0dXJuKG4udCA8IHNpemUgfCBuLmMgPCBzaXplIHwgKG4udCArIG4uYyA8IHNpemUubmV3ICogMikpCiAgfSkKICAKICAjIHNwbGl0IHVuYmFsYW5jZWQgYmxvY2tzIGludG8gbW9yZSBibG9ja3MKICBzcGxpdCA8LSB3aGljaCgoYWJzKHQuc3RhdCkgPiB0Lm1heCkgJiAoIWlnbm9yZSkpCiAgCiAgaWYobGVuZ3RoKHNwbGl0KSA9PSAwKQogICAgYnJlYWsKCiAgCiAgIyMgd2UgbmVlZCB0byBrZWVwIGEgY3VycmVudCBjb3B5IG9mIHRoZSBibG9jayBpbmZvcm1hdGlvbiBhbmQgd2hpY2ggYmxvY2sgdG8gaWdub3JlIGFzIGJsb2NrIGFzc2lnbm1lbnRzIGFyZSBnb2luZyB0byBjaGFuZ2UgbGF0ZXIKICBiLmN1cnJlbnQgPC0gdGVtcCRiCiAgCiAgZm9yKGogaW4gc3BsaXQpCiAgewogICAgCiAgICBjdXRvZmYgPC0gbWVkaWFuKHRlbXAkZVtiLmN1cnJlbnQgPT0gal0pCiAgICAjIyBXZSBzcGxpdCB1bml0cyBpbnRvIHR3byBuZXcgYmxvY2tzCiAgICAjIyBleHRyYWN0IHRoZSBpbmRleCBvZiB1bml0cyBiZWxvbmdpbmcgdG8gZWFjaCBuZXcgc3RyYXR1bQogICAgaWR4LnMgPC0gd2hpY2goYi5jdXJyZW50ID09IGogJiB0ZW1wJGUgPCBjdXRvZmYpCiAgICBpZHgubCA8LSB3aGljaChiLmN1cnJlbnQgPT0gaiAmIHRlbXAkZSA+IGN1dG9mZikKICAgICMjIHJhbmRvbWx5IHB1dCBoYWxmIG9mIHRoZSB0aWVzIGludG8gb25lIGNhdGVnb3J5CiAgICBpZHguZSA8LSBzYW1wbGUod2hpY2godGVtcCRlID09IGN1dG9mZiAmIGIuY3VycmVudCA9PSBqKSkKICAgIG4udGllIDwtIGxlbmd0aChpZHguZSkKICAgIGlmIChuLnRpZSA+PSAxKSB7CiAgICAgIGlmIChuLnRpZSA+IDEpCiAgICAgICAgaWR4LnMgPC0gYyhpZHgucywgaWR4LmVbMTpyb3VuZChuLnRpZS8yKV0pCiAgICAgIGlkeC5sIDwtIGMoaWR4LmwsIGlkeC5lWyhyb3VuZChuLnRpZS8yKSsgMSk6bi50aWVdKQogICAgfQogICAgICAKICAgICMjIHdlIHNwbGl0IG9ubHkgd2hlbiBuZXcgc3RyYXR1bSBoYXMgYXQgbGVhc3Qgc2l6ZSBudW1iZXIgb2YgY29udHJvbC90cmVhdGVkIHVuaXRzCiAgICBpZiAoc3VtKHRlbXAkdHJlYXRbaWR4LnNdPT0xKSA+IHNpemUgJiYgc3VtKHRlbXAkdHJlYXRbaWR4LnNdPT0wKSA+IHNpemUgJiYgCiAgICAgICAgc3VtKHRlbXAkdHJlYXRbaWR4LmxdPT0xKSA+IHNpemUgJiYgc3VtKHRlbXAkdHJlYXRbaWR4LmxdPT0wKSA+IHNpemUpIHsKICAgICAgIyBhbnl0aGluZyBhYm92ZSB0aGUgY3VycmVudCB3aWxsIGhhdmUgdG8gYmUgbW92ZWQgdXAgMQogICAgICB0ZW1wJGJbYi5jdXJyZW50ID4gal0gPC0gdGVtcCRiW2IuY3VycmVudCA+IGpdICsgMQogICAgICAjIyB0aGUgdXBwZXIgbmV3IHN0cmF0dW0gd2lsbCBhbHNvIGhhdmUgdGhlIGJsb2NrIGlkeCBhZGRlZCBieSAxCiAgICAgIHRlbXAkYltpZHgubF0gPC0gdGVtcCRiW2lkeC5sXSArIDEKICAgIH0KICAgICMjIFdlIGRvbid0IGRvIGFueXRoaW5nIGlmIHdlIGRvIG5vdCB3YW50IHRvIHNwbGl0CiAgfQogIAogICAjIGNhbGN1bGF0ZSB0IHN0YXRpc3RpYyBmb3IgZWFjaCBibG9jawogIGIgPC0gbWF4KHRlbXAkYikKICB0LnN0YXQgPC0gc2FwcGx5KDE6YiwgZnVuY3Rpb24oaikgewogICAgdC50ZXN0KHggPSB0ZW1wJGVbdGVtcCR0cmVhdCA9PSAxICYgdGVtcCRiID09IGpdLCAKICAgICAgICAgICAgICAgICAgICAgICAgeSA9IHRlbXAkZVt0ZW1wJHRyZWF0ID09IDAgJiB0ZW1wJGIgPT0gal0sIHZhci5lcXVhbD1UKSRzdGF0aXN0aWMKICB9KQogIAogICMjIFVwZGF0ZSBjb25kaXRpb24KICAjIGNoZWNrIHdoZXRoZXIgQU5ZIGJsb2NrcyBhcmUgYWJvdmUgdC5tYXgKICAjIEFORCBhcmUgbm90IHRvbyBzbWFsbAogIGNvbmRpdGlvbiA8LSBhbnkoYWJzKHQuc3RhdCkgPiB0Lm1heCkKfQoKbGFsb25kZSRibG9ja3MgPC0gdGVtcCRiCnRhYmxlKGxhbG9uZGVbLCBjKCJ0cmVhdCIsICJibG9ja3MiKV0pCgojIyBjaGVjayB0aGUgcmFuZ2Ugb2YgZXN0aW1hdGVkIHByb3BlbnNpdHkgc2NvcmVzCnNhcHBseSgxOm1heChsYWxvbmRlJGJsb2NrcyksIGZ1bmN0aW9uKGopIHJhbmdlKHRlbXAkZVt0ZW1wJGIgPT0gal0pKQpgYGAKCi0gTmV4dCwgd2UgdHJlYXQgdGhlIGRhdGEgYXMgZnJvbSBhIHN0cmF0aWZpZWQgcmFuZG9taXplZCBleHBlcmltZW50IGFuZCB1c2UgTmV5bWFuJ3MgZXN0aW1hdG9yCmBgYHtyfQojIyBDSSBmcm9tIE5leW1hbgplc3RfcGVyX2Jsb2NrIDwtIHNhcHBseSgxOm1heChsYWxvbmRlJGJsb2NrcyksIGZ1bmN0aW9uKGlkeCkgewogIFl0IDwtIGxhbG9uZGUkcmU3OFtsYWxvbmRlJGJsb2NrcyA9PSBpZHggJiBsYWxvbmRlJHRyZWF0ID09IDFdCiAgWWMgPC0gbGFsb25kZSRyZTc4W2xhbG9uZGUkYmxvY2tzID09IGlkeCAmIGxhbG9uZGUkdHJlYXQgPT0gMF0KICAKICB0YXVfaGF0IDwtIG1lYW4oWXQpIC0gbWVhbihZYykKCiAgc3QgPC0gc2QoWXQpCiAgc2MgPC0gc2QoWWMpCiAgdmFyaGF0X3RhdWhhdCA8LSBzdF4yL2xlbmd0aChZdCkgKyBzY14yL2xlbmd0aChZYykKICByZXR1cm4oYyh0YXVfaGF0LCB2YXJoYXRfdGF1aGF0KSkKfSkKY29sbmFtZXMoZXN0X3Blcl9ibG9jayk8LSAxOm1heChsYWxvbmRlJGJsb2NrcykKcm93bmFtZXMoZXN0X3Blcl9ibG9jaykgPC0gYygibWVhbiIsICJ2YXIiKQplc3RfcGVyX2Jsb2NrCnd0cyA8LSB0YWJsZShsYWxvbmRlJGJsb2NrcykvbGVuZ3RoKGxhbG9uZGUkYmxvY2tzKQoKdGF1X2hhdCA8LSBzdW0oZXN0X3Blcl9ibG9ja1sxLCBdICogd3RzKQpzZF90YXVfaGF0IDwtIHNxcnQoc3VtKGVzdF9wZXJfYmxvY2tbMiwgXSAqIHd0c14yKSkKICAKCnJlc3VsdCA8LSBjKHRhdV9oYXQsIHNkX3RhdV9oYXQsIGModGF1X2hhdC0gMS45NiAqIHNkX3RhdV9oYXQsIHRhdV9oYXQgKyAxLjk2ICogc2RfdGF1X2hhdCkpCm5hbWVzKHJlc3VsdCkgPC0gYygiZXN0IiwgInNkIiwgIkNJX2xvd2VyIiwgIkNJX3VwcGVyIikKcmVzdWx0CmBgYAoKVGhpcyByZXN1bHQgaXMgdmVyeSBjbG9zZSB0byB3aGF0IHdlIGdldCBmcm9tIElQVy4K