Analysis of a Two by Two Table

2002-04-06

Consider the following table:

Factor B

Y

N

Factor A

Y

23

77

N

49

51


Two by Two Table - Log Linear Analysis

> twotwoll<-data.frame(count=c(23,49,77,51),
A=factor(c("Y","N","Y","N")),B=factor(c("Y","Y","N","N")))
 
> twotwoll
  count A B
1    23 Y Y
2    49 N Y
3    77 Y N
4    51 N N
 
> fitttll<-glm(count~A*B,data=twotwoll,family=poisson(link=log))
 
> anova(fitttll,test="Chisq")
Analysis of Deviance Table
 
Model: poisson, link: log
 
Response: count
 
Terms added sequentially (first to last)
 
 
     Df Deviance Resid. Df Resid. Dev P(>|Chi|)
NULL                     3    30.8142          
A     1   0.0000         2    30.8142    1.0000
B     1  15.8916         1    14.9226    0.0001
A:B   1  14.9226         0   4.44e-16    0.0001

Two by Two Table - Logistic Analysis

> twotwolg <- data.frame(count=c(23,49),n=c(100,100),A=factor(c("Y","N")))
 
> twotwolg
  count   n A
1    23 100 Y
2    49 100 N
 
> fitttlg <- glm(cbind(count,n-count)~A, data=twotwolg, family=binomial(link=logit))
 
> anova(fitttlg, test="Chisq")
Analysis of Deviance Table
 
Model: binomial, link: logit
 
Response: cbind(count, n - count)
 
Terms added sequentially (first to last)
 
 
     Df Deviance Resid. Df Resid. Dev P(>|Chi|)
NULL                     1    14.9226          
A     1  14.9226         0  1.108e-14    0.0001

Two by Two Table - Log Linear Analysis - Fitted Models

> coef(update(fitttll,.~1))
(Intercept) 
   3.912027 
> predict(update(fitttll,.~1))
       1        2        3        4 
3.912027 3.912027 3.912027 3.912027 
> exp(predict(update(fitttll,.~1)))
       1        2        3        4 
50.00021 50.00021 50.00021 50.00021 
> coef(update(fitttll,.~A))
 (Intercept)           AY 
3.912023e+00 1.925451e-09 
> predict(update(fitttll,.~A))
       1        2        3        4 
3.912023 3.912023 3.912023 3.912023 
> exp(predict(update(fitttll,.~A)))
 1  2  3  4 
50 50 50 50 
> coef(update(fitttll,.~A+B))
  (Intercept)            AY            BY 
 4.158883e+00  6.131844e-07 -5.753613e-01 
> predict(update(fitttll,.~A+B))
       1        2        3        4 
3.583522 3.583522 4.158883 4.158883 
> exp(predict(update(fitttll,.~A+B)))
       1        2        3        4 
36.00012 36.00009 64.00002 63.99998 
> coef(fitttll)
(Intercept)          AY          BY       AY:BY 
 3.93182563  0.41197979 -0.04000533 -1.16830587 
> predict(fitttll)
       1        2        3        4 
3.135494 3.891820 4.343805 3.931826 
> exp(predict(fitttll))
 1  2  3  4 
23 49 77 51 

Two by Two Table - Logistic Analysis - Fitted Models

> invlogit <- function(x) exp(x)/(1+exp(x))
> coef(update(fitttlg,.~1))
(Intercept) 
 -0.5753641 
> predict(update(fitttlg,.~1))
[1] -0.5753641 -0.5753641
> invlogit(predict(update(fitttlg,.~1)))
[1] 0.36 0.36
> invlogit(predict(update(fitttlg,.~1)))*twotwolg$n
[1] 36 36
> coef(fitttlg)
(Intercept)          AY 
-0.04000533 -1.16830587 
> predict(fitttlg)
[1] -1.20831120 -0.04000533
> invlogit(predict(fitttlg))
[1] 0.23 0.49
> invlogit(predict(fitttlg))*twotwolg$n
[1] 23 49

Raw Data - Logistic Analysis

> twotworaw<-data.frame(y=c(rep(1,72),rep(0,128)),
A=c(rep("Y",23),rep("N",49),rep("Y",77),rep("N",51)))

Here, I have printed out only the first 30 of 200 lines of data:

> twotworaw[1:30,]
   y A
1  1 Y
2  1 Y
3  1 Y
4  1 Y
5  1 Y
6  1 Y
7  1 Y
8  1 Y
9  1 Y
10 1 Y
11 1 Y
12 1 Y
13 1 Y
14 1 Y
15 1 Y
16 1 Y
17 1 Y
18 1 Y
19 1 Y
20 1 Y
21 1 Y
22 1 Y
23 1 Y
24 1 N
25 1 N
26 1 N
27 1 N
28 1 N
29 1 N
30 1 N
 
> fitttr <- glm(cbind(y,1-y)~A, data=twotworaw, family=binomial(link=logit))
 
> anova(fitttr)
Analysis of Deviance Table
 
Model: binomial, link: logit
 
Response: cbind(y, 1 - y)
 
Terms added sequentially (first to last)
 
 
      Df Deviance Resid. Df Resid. Dev
NULL                    199    261.367
A      1   14.923       198    246.445

Note that the residual deviance can't get to 0 when the data are entered in this form.


Statistics 4P03/6P03