We still use the lalonde data from the MatchIt package and use the propensity score model that we found out in R example 5
library("MatchIt")
data("lalonde")
model <- glm(treat ~ re74 + race + married + I(re74^2) + re74:race, data = lalonde, family = "binomial")
eps <- predict(model, type = "response")
lalonde
Then we perform a first check of the weights by drawing the histogram or boxplot of the weights. At this moment, the figures are not very informative as there are units with extremely large weights.
## Calculate the raw weights (before normalization)
## Weights to estimate ATE
n.treated <- sum(lalonde$treat == 1)
n.control <- sum(lalonde$treat == 0)
weights <- ifelse(lalonde$treat == 1, 1/eps, 1/(1 - eps))
## Check the weights histogram
library(ggplot2)
temp.data <- data.frame(weights = weights, treated = as.factor(lalonde$treat))
ggplot(temp.data, aes(x = weights, fill = treated, color = treated)) +
geom_histogram(alpha = 0.5, position = "identity") +
xlab("Weights")
## We can also draw boxplots
ggplot(temp.data, aes(x = treated, y = weights, color = treated)) +
geom_boxplot()
## Need to change race (categorical) into indicators (numerical)
lalonde$black <- lalonde$race == "black"
lalonde$hispan <- lalonde$race == "hispan"
lalonde$white <- lalonde$race == "white"
## Draw love plot
love.plot = function(cov, treat, ## cov is the matrix of covariates and treat is a vector of treatment assignment
weights = rep(1, length(treat)),
plot = F)
{
## mean with normalized weights \sum w_i x_i / (\sum w_i)
treat.means <- colSums(cov[treat == 1,] * weights[treat == 1])/sum(weights[treat == 1])
treat.var <- colSums(t(t(cov[treat == 1,]) - treat.means)^2 *
weights[treat == 1])/sum(weights[treat == 1])
control.means <- colSums(cov[treat == 0,] * weights[treat == 0])/sum(weights[treat == 0])
control.var <- colSums(t(t(cov[treat == 0,]) - control.means)^2 *
weights[treat == 0])/sum(weights[treat == 0])
## the standardized mean differences for every covariate
smd <- (treat.means - control.means)/sqrt((treat.var + control.var)/2)
names(smd) <- colnames(cov)
if (plot == T) {
plot.data <- data.frame(smd = smd, covariates = names(smd))
range <- max(abs(smd))
ggplot(plot.data) + geom_point(aes(x = as.numeric(smd), y = covariates)) +
geom_vline(xintercept = 0) + xlim(-range, range) +
labs(x = 'Standardized Difference in Means')
}
return(smd)
}
colnames(lalonde)
[1] "treat" "age" "educ" "race" "married" "nodegree" "re74" "re75"
[9] "re78" "black" "hispan" "white"
raw.smd <- love.plot(lalonde[, c(2:3, 5:9, 10:12)], lalonde$treat)
weighted.smd <- love.plot(lalonde[, c(2:3, 5:9, 10:12)], lalonde$treat, weights = weights)
plot.data <- data.frame(smd = c(raw.smd, weighted.smd),
covariates = c(names(raw.smd), names(weighted.smd)),
category = c(rep("Original", length(raw.smd)), rep("IPW", length(weighted.smd))))
range <- max(abs(plot.data$smd))
ggplot(plot.data) + geom_point(aes(x = as.numeric(smd), y = covariates, color = category)) +
geom_vline(xintercept = c(-0.1, -0.05, 0, 0.05, 0.1),
linetype = c("solid", "dashed", "solid", "dashed", "solid")) +
xlim(-range, range) +
labs(x = 'Standardized Difference in Means')