## Title: logit-simulation-1
## Author: Paul Johnson <pauljohn at ku.edu>
## Date posted: 2006-03-08
## Description. Creates a dichotomous dependent variable.
## Estimates a logistic regression.

N <- 1000
A <- -1
B <- 0.3


x <- 1 + 10 * rnorm(N)
eta <- A + B * x

pi <- exp(eta)/(1+exp(eta))

## Here's one way
myunif <- runif(N)
y <- ifelse(myunif < pi, 1, 0)

## Here's another way that is more like R, less like
## a guy who doesn't know about R
y2 <- rbinom(N, size = 1, prob = pi)

## Here's the way my friends in Economics would do it.
y3 <- rlogis(N) < eta

plot(x, y, main=bquote( eta[i] == .(A) +   .(B) * x[i] ))

text ( 0.5*max(x), 0.5, expression( Prob( y[i] == 1) == frac( 1 , 1 + exp(-eta[i] ))))

plot of chunk unnamed-chunk-1

myglm1 <- glm ( y ~ x, family=binomial(link="logit") )
summary(myglm1)

## 
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.601  -0.469  -0.103   0.356   2.825  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2236     0.1185   -10.3   <2e-16 ***
## x             0.3384     0.0221    15.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1349.97  on 999  degrees of freedom
## Residual deviance:  641.88  on 998  degrees of freedom
## AIC: 645.9
## 
## Number of Fisher Scoring iterations: 6

termplot(myglm1)

plot of chunk unnamed-chunk-1

library(effects)

## Error: there is no package called 'effects'

alleff<- all.effects(myglm1)

## Error: could not find function "all.effects"

plot(alleff)

## Error: object 'alleff' not found

## Just for fun....

myglm2 <- glm(y~x, family=quasibinomial)
summary(myglm2)

## 
## Call:
## glm(formula = y ~ x, family = quasibinomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.601  -0.469  -0.103   0.356   2.825  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.2236     0.1059   -11.6   <2e-16 ***
## x             0.3384     0.0198    17.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.7998)
## 
##     Null deviance: 1349.97  on 999  degrees of freedom
## Residual deviance:  641.88  on 998  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

### Mixed model: random intercept with large variance

eta <- A + B * x + 5 * rnorm(N)
pi <- exp(eta)/(1+exp(eta))
myunif <- runif(N)
y <- ifelse(myunif < pi, 1, 0)

plot(x,y, main=bquote( eta[i] == .(A) +   .(B) * x[i] + u[i]))

text ( 0.5*max(x), 0.5, expression( Prob( y[i] == 1) == frac( 1 , 1 + exp(-eta[i] ))))

plot of chunk unnamed-chunk-1

### Parameter estimates go to hell, as expected
myglm3 <- glm ( y ~ x, family=binomial(link="logit") )
summary(myglm3)

## 
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.971  -0.998  -0.610   1.086   2.066  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.30864    0.06967   -4.43  9.4e-06 ***
## x            0.08634    0.00787   10.98  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1377.4  on 999  degrees of freedom
## Residual deviance: 1229.7  on 998  degrees of freedom
## AIC: 1234
## 
## Number of Fisher Scoring iterations: 4

### Why doesn't quasibinomial show more evidence of the random intercept?
myglm4 <- glm(y~x, family=quasibinomial)
summary(myglm4)

## 
## Call:
## glm(formula = y ~ x, family = quasibinomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.971  -0.998  -0.610   1.086   2.066  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.30864    0.06961   -4.43    1e-05 ***
## x            0.08634    0.00786   10.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.9983)
## 
##     Null deviance: 1377.4  on 999  degrees of freedom
## Residual deviance: 1229.7  on 998  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

## Am on thin ice here.  The extra variance in the model isn't
## an identifiable effect, the estimation process itself forces
## the variance to 1.  Thus the slope coefficient is "squished"
## in an understandable way.

## TODO: I need to introduce "clustering" with the random effect
## so I can fit this with a generalized linear mixed model.