Read the data and remove the two individuals that have dropped out. Focus on the outcome variable SPPB score (short physical performance battery).
data <- read.table("HOMEFOOD.tab", sep = "\t", header = T)
data <- data[!is.na(data$BMI_1), ]
data <- data[, c("group", "sex", "age", "education_2_cat", "smoker", "alcohol", "nr_of_medicines", "BMI_0", "sppb_0", "ICD10", "MMSE_0", "ISNST_0", "sppb_1")]
data
Now, let’s check balancing for pre-treatment covariates
balance_results <- sapply(2:12, function(i) {
mean_t <- mean(data[data$group == 1, i])
mean_c <- mean(data[data$group == 0, i])
sd_t <- sd(data[data$group == 1, i])
sd_c <- sd(data[data$group == 0, i])
t_stat <- (mean_t - mean_c) / sqrt(sd_t^2/sum(data$group == 1) + sd_c^2/sum(data$group == 0))
return(c(mean_t, sd_t, mean_c, sd_c, t_stat))
})
balance_results <- t(balance_results)
colnames(balance_results) <- c("treat_mean", "treat_sd", "control_mean", "control_sd", "t_stat")
rownames(balance_results) <- colnames(data)[2:12]
signif(balance_results, 2)
treat_mean treat_sd control_mean control_sd t_stat
sex 1.30 0.45 1.50 0.50 -2.100
age 83.00 6.70 82.00 6.00 1.200
education_2_cat 0.69 0.47 0.67 0.47 0.210
smoker 2.00 0.19 1.90 0.30 1.200
alcohol 1.60 0.49 1.50 0.50 0.790
nr_of_medicines 12.00 4.60 12.00 4.20 -0.640
BMI_0 28.00 6.50 27.00 5.30 1.300
sppb_0 2.50 1.80 2.40 2.00 0.310
ICD10 10.00 4.90 10.00 3.80 -0.089
MMSE_0 26.00 2.70 26.00 2.90 0.420
ISNST_0 5.10 1.70 4.50 1.30 1.900
Let’s first perform Neyman’s analysis with post-stratification on sex.
## CI from Neyman
## For the female strata
tau_hat_F <- mean(data$sppb_1[data$group == 1 & data$sex == 1]) - mean(data$sppb_1[data$group == 0 & data$sex == 1])
st_F <- sd(data$sppb_1[data$group == 1 & data$sex == 1])
sc_F <- sd(data$sppb_1[data$group == 0 & data$sex == 1])
varhat_tauhat_F <- st_F^2/sum(data$group == 1 & data$sex == 1) + sc_F^2/sum(data$group == 0 & data$sex == 1)
## For the male strata
tau_hat_M <- mean(data$sppb_1[data$group == 1 & data$sex == 2]) - mean(data$sppb_1[data$group == 0 & data$sex == 2])
st_M <- sd(data$sppb_1[data$group == 1 & data$sex == 2])
sc_M <- sd(data$sppb_1[data$group == 0 & data$sex == 2])
varhat_tauhat_M <- st_M^2/sum(data$group == 1 & data$sex == 2) + sc_M^2/sum(data$group == 0 & data$sex == 2)
nF <- sum(data$sex == 1)
nM <- sum(data$sex == 2)
tau_hat <- nF/(nF+nM) * tau_hat_F + nM/(nF+nM) * tau_hat_M
sd_tau_hat <- sqrt((nF/(nF+nM))^2 * varhat_tauhat_F + (nM/(nF+nM))^2 * varhat_tauhat_M)
## 95% CI
result <- cbind(c(tau_hat_F, sqrt(varhat_tauhat_F)), c(tau_hat_M, sqrt(varhat_tauhat_M)), c(tau_hat, sd_tau_hat))
colnames(result) <- c("Female", "Male", "Combined")
rownames(result) <- c("est", "sd")
signif(result, 2)
Female Male Combined
est 0.65 1.90 1.10
sd 0.53 0.72 0.43
print(c(tau_hat- 1.96 * sd_tau_hat, tau_hat + 1.96 * sd_tau_hat))
[1] 0.263587 1.932082
Neyman’s approach without post-stratification (also a valid inference)
tau_hat <- mean(data$sppb_1[data$group == 1]) - mean(data$sppb_1[data$group == 0])
st <- sd(data$sppb_1[data$group == 1])
sc <- sd(data$sppb_1[data$group == 0])
varhat_tauhat <- st^2/sum(data$group == 1) + sc^2/sum(data$group == 0)
pval <- 2 * pnorm(abs(tau_hat)/sqrt(varhat_tauhat), lower.tail = F)
result <- c(tau_hat, sqrt(varhat_tauhat), tau_hat- 1.96 * sqrt(varhat_tauhat), tau_hat + 1.96 * sqrt(varhat_tauhat), pval)
names(result) <- c("est", "sd", "CI lower", "CI upper", "p-value")
signif(result, 2)
est sd CI lower CI upper p-value
1.000 0.420 0.220 1.900 0.013
Regression analysis that incorporate sex. We should add an interaction between sex and treatment to allow for difference in the treatment effect across sex
data$sex <- data$sex - mean(data$sex)
lm.result <- lm(sppb_1 ~ group + sex + group:sex, data = data)
library(sandwich)
summary(lm.result)
Call:
lm(formula = sppb_1 ~ group + sex + group:sex, data = data)
Residuals:
Min 1Q Median 3Q Max
-3.5789 -1.6250 0.4211 1.4211 5.0714
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.8177 0.3017 9.340 2.77e-15 ***
group 1.0978 0.4276 2.567 0.0117 *
sex -0.3036 0.5942 -0.511 0.6105
group:sex 1.2246 0.8939 1.370 0.1737
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.136 on 100 degrees of freedom
Multiple R-squared: 0.07671, Adjusted R-squared: 0.04902
F-statistic: 2.77 on 3 and 100 DF, p-value: 0.04558
sd <- sqrt(vcovHC(lm.result, type = "HC2")[2, 2])
est <- summary(lm.result)$coefficients["group", "Estimate"]
pval <- 2 * pnorm(abs(est)/sd, lower.tail = F)
result <- c(est, sd, est - 1.96*sd, est + 1.96*sd, pval)
names(result) <- c("est", "sd", "CI lower", "CI upper", "p-value")
signif(result, 2)
est sd CI lower CI upper p-value
1.1000 0.4300 0.2600 1.9000 0.0099
We get the same CI as using Neyman’s approach with post-stratification
If we want to further improve efficiency, we can further incorporate pre-treatment sppb score.
data$sppb_0 <- data$sppb_0 - mean(data$sppb_0)
lm.result <- lm(sppb_1 ~ group + sex + sppb_0 + group:(sex+sppb_0), data = data)
library(sandwich)
summary(lm.result)
Call:
lm(formula = sppb_1 ~ group + sex + sppb_0 + group:(sex + sppb_0),
data = data)
Residuals:
Min 1Q Median 3Q Max
-4.5943 -1.2032 0.0681 1.4106 3.8035
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.82523 0.26399 10.702 < 2e-16 ***
group 1.05983 0.37437 2.831 0.00563 **
sex 0.01903 0.52398 0.036 0.97110
sppb_0 0.66910 0.13444 4.977 2.76e-06 ***
group:sex 0.82426 0.78539 1.050 0.29653
group:sppb_0 -0.27133 0.19574 -1.386 0.16884
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.869 on 98 degrees of freedom
Multiple R-squared: 0.3071, Adjusted R-squared: 0.2718
F-statistic: 8.687 on 5 and 98 DF, p-value: 7.596e-07
sd <- sqrt(vcovHC(lm.result, type = "HC2")[2, 2])
est <- summary(lm.result)$coefficients["group", "Estimate"]
pval <- 2 * pnorm(abs(est)/sd, lower.tail = F)
result <- c(est, sd, est - 1.96*sd, est + 1.96*sd, pval)
names(result) <- c("est", "sd", "CI lower", "CI upper", "p-value")
signif(result, 2)
est sd CI lower CI upper p-value
1.1000 0.3700 0.3300 1.8000 0.0044