Suppose we conduct experiment with complete randomization, obtain the assignment \(W = (1,1,0,1,1,0,0,1)\), and the observed outcome for all units are \(Y^{\text{obs}} = (12, 7, 2, 7, 6, 6, 2, 7)\).
W <- c(1,1,0,1,1,0,0,1)
Yobs <- c(12, 7, 2, 7, 6, 6, 2, 7)
n <- length(W)
n1 <- sum(W)
# Under the sharp null of no causal effect
Y0 <- Y1 <- Yobs
data <- data.frame(W = W, Y0 = Y0, Y1 = Y1, Yobs = Yobs)
print(data)
## W Y0 Y1 Yobs
## 1 1 12 12 12
## 2 1 7 7 7
## 3 0 2 2 2
## 4 1 7 7 7
## 5 1 6 6 6
## 6 0 6 6 6
## 7 0 2 2 2
## 8 1 7 7 7
## observed test statistics value, here you can also replace the difference-in-means estimator with other types of test statistics
tau_obs <- mean(Yobs[W==1]) - mean(Yobs[W==0])
### get all possible assignments
treated_all <- combn(n, n1)
permuted_assignments <- apply(treated_all, 2, function(idx) {
permuted_treatment <- rep(0, n)
permuted_treatment[idx] <- 1
return(permuted_treatment)
})
# calculate test statistics for all permutations
tau_rand <- apply(permuted_assignments, 2, function(w) {
mean(data$Y1[w==1]) - mean(data$Y0[w==0])
})
hist(tau_rand, breaks = 10, main="Distribution under sharp Null of no individual causal effect", xlab="Obs Diff in Means")
hist(tau_rand, breaks = 10, main="Distribution under sharp Null of no individual causal effect", xlab="Obs Diff in Means")
abline(v = tau_obs, col = "red")
## calculate p-value
p <- mean(abs(tau_rand) >= abs(tau_obs))
print(p)
## [1] 0.03571429
# Under the sharp null of causal effect is 1
data$Y0[data$W == 1] <- data$Yobs[data$W == 1] - 1
data$Y1[data$W == 0] <- data$Yobs[data$W == 0] + 1
print(data)
## W Y0 Y1 Yobs
## 1 1 11 12 12
## 2 1 6 7 7
## 3 0 2 3 2
## 4 1 6 7 7
## 5 1 5 6 6
## 6 0 6 7 6
## 7 0 2 3 2
## 8 1 6 7 7
## recompute the distribution of Y1[w = 1] - Y0[w = 0] - 1
tau_obs <- mean(Yobs[W==1]) - mean(Yobs[W==0]) - 1
tau_rand <- apply(permuted_assignments, 2, function(w) {
mean(data$Y1[w==1]) - mean(data$Y0[w==0]) - 1
})
hist(tau_rand, breaks = 10, main="Distribution under sharp Null of causal effect = 1", xlab="Test statistics")
abline(v = tau_obs, col = "red")
p = mean(abs(tau_rand) >= abs(tau_obs))
print(p)
## [1] 0.1964286
# need to write a function that compute p-value for any tau
getPvalue <- function(tau, data) {
tau_obs <- mean(data$Yobs[data$W==1]) - mean(data$Yobs[data$W==0]) - tau
data$Y0[data$W == 1] <- data$Yobs[data$W == 1] - tau
data$Y1[data$W == 0] <- data$Yobs[data$W == 0] + tau
## recompute the distribution of Y1[w = 1] - Y0[w = 0] - tau
tau_rand <- apply(permuted_assignments, 2, function(w) {
mean(data$Y1[w==1]) - mean(data$Y0[w==0]) - tau
})
p = mean(abs(tau_rand) >= abs(tau_obs))
return(p)
}
## Compute the p-value for a seqeunce of tau
tau_seq <- seq(-15, 15, by = 1)
pvalue_seq <- sapply(tau_seq, getPvalue, data)
names(pvalue_seq) <- tau_seq
print(pvalue_seq, digits = 3)
## -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5
## 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179
## -4 -3 -2 -1 0 1 2 3 4 5 6
## 0.0179 0.0179 0.0179 0.0179 0.0357 0.1964 0.2143 0.5179 0.8393 0.9107 0.4464
## 7 8 9 10 11 12 13 14 15
## 0.1964 0.1071 0.1071 0.0536 0.0179 0.0179 0.0179 0.0179 0.0179
A rough 95% confidence interval is [0, 11]. You may want a more refined confidence interval by finding the cutoff that is close to 0.05.
## find a better lower bound
tau_seq1 <- seq(0, 1, by = 0.1)
pvalue_seq1 <- sapply(tau_seq1, getPvalue, data)
names(pvalue_seq1) <- tau_seq1
print(pvalue_seq1, digits = 3)
## 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1
## 0.0357 0.0357 0.0357 0.0357 0.0357 0.0357 0.0357 0.0357 0.0357 0.0357 0.1964
## find a better upper bound
tau_seq2 <- seq(10, 11, by = 0.1)
pvalue_seq2 <- sapply(tau_seq2, getPvalue, data)
names(pvalue_seq2) <- tau_seq2
print(pvalue_seq2, digits = 3)
## 10 10.1 10.2 10.3 10.4 10.5 10.6 10.7 10.8 10.9 11
## 0.0536 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179 0.0179
A more refined 95% CI is [0.9, 10.1]
## Say we want M = 10000 random samples of the treatment assignments
### get M random samples of possible assignments
getPvalueMC <- function(data, tau, M = 10000) {
tau_obs <- mean(data$Yobs[data$W==1]) - mean(data$Yobs[data$W==0]) - tau
data$Y0[data$W == 1] <- data$Yobs[data$W == 1] - tau
data$Y1[data$W == 0] <- data$Yobs[data$W == 0] + tau
## randomly sample the treatment assignment vector following the complete randomization mechanism
set.seed(0)
tau_rand <- sapply(1:M, function(i) {
w <- sample(data$W)
return(mean(data$Y1[w==1]) - mean(data$Y0[w==0]) - tau)
})
p = mean(abs(tau_rand) >= abs(tau_obs))
return(p)
}
getPvalueMC(data, 0, 10000)
## [1] 0.0332