Alligators <- read.table("Alligators.dat", header = T)
Alligators
This is a dataset with grouped multinomial responses.
We can use a Baseline-category logit model. Using the first category as the baseline, we have
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
fit <- vglm(formula = cbind(y2, y3, y4, y5, y1) ~ size + factor(lake), family = multinomial, data = Alligators)
summary(fit)
##
## Call:
## vglm(formula = cbind(y2, y3, y4, y5, y1) ~ size + factor(lake),
## family = multinomial, data = Alligators)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.2074 0.6387 -5.021 5.13e-07 ***
## (Intercept):2 -2.0718 0.7067 -2.931 0.003373 **
## (Intercept):3 -1.3980 0.6085 -2.297 0.021601 *
## (Intercept):4 -1.0781 0.4709 -2.289 0.022061 *
## size:1 1.4582 0.3959 3.683 0.000231 ***
## size:2 -0.3513 0.5800 -0.606 0.544786
## size:3 -0.6307 0.6425 -0.982 0.326296
## size:4 0.3316 0.4482 0.740 0.459506
## factor(lake)2:1 2.5956 0.6597 3.934 8.34e-05 ***
## factor(lake)2:2 1.2161 0.7860 1.547 0.121824
## factor(lake)2:3 -1.3483 1.1635 -1.159 0.246529
## factor(lake)2:4 -0.8205 0.7296 -1.125 0.260713
## factor(lake)3:1 2.7803 0.6712 4.142 3.44e-05 ***
## factor(lake)3:2 1.6925 0.7804 2.169 0.030113 *
## factor(lake)3:3 0.3926 0.7818 0.502 0.615487
## factor(lake)3:4 0.6902 0.5597 1.233 0.217511
## factor(lake)4:1 1.6584 0.6129 2.706 0.006813 **
## factor(lake)4:2 -1.2428 1.1854 -1.048 0.294466
## factor(lake)4:3 -0.6951 0.7813 -0.890 0.373608
## factor(lake)4:4 -0.8262 0.5575 -1.482 0.138378
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,5]), log(mu[,2]/mu[,5]),
## log(mu[,3]/mu[,5]), log(mu[,4]/mu[,5])
##
## Residual deviance: 17.0798 on 12 degrees of freedom
##
## Log-likelihood: -47.5138 on 12 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 5 of the response
We can check whether we need a more complicated model by LRT test of the residual deviance
1 - pchisq(17.0798, df = 12)
## [1] 0.1466201
This indicates that interactions are not needed.
Both the lake and size main effects are needed.
library(VGAM)
fit.nosize <- vglm(formula = cbind(y2, y3, y4, y5, y1) ~ factor(lake), family = multinomial, data = Alligators)
anova(fit.nosize, fit, type = "I")
The fitted probabilities
fitted(fit)
## y2 y3 y4 y5 y1
## 1 0.09309880 0.04745657 0.070401523 0.25373963 0.5353035
## 2 0.02307168 0.07182461 0.140896287 0.19400964 0.5701978
## 3 0.60189675 0.07722761 0.008817482 0.05387208 0.2581861
## 4 0.24864518 0.19483742 0.029416085 0.06866281 0.4584385
## 5 0.51683852 0.08876722 0.035894709 0.17420051 0.1842990
## 6 0.19296122 0.20239954 0.108225068 0.20066164 0.2957525
## 7 0.41285579 0.01156654 0.029671169 0.09380245 0.4521040
## 8 0.13967784 0.02389871 0.081067366 0.09791362 0.6574425
y1: Fish, y2: Invertebrate, y3: Reptile, y4: Bird, y5: Other