## Title: logit-simulation-1
## Author: Paul Johnson <pauljohn at ku.edu>
## Date posted: 2006-03-08
## Description. Creates a dichotomous dependent variable.
## Estimates a logistic regression.
N <- 1000
A <- -1
B <- 0.3
x <- 1 + 10 * rnorm(N)
eta <- A + B * x
pi <- exp(eta)/(1+exp(eta))
## Here's one way
myunif <- runif(N)
y <- ifelse(myunif < pi, 1, 0)
## Here's another way that is more like R, less like
## a guy who doesn't know about R
y2 <- rbinom(N, size = 1, prob = pi)
## Here's the way my friends in Economics would do it.
y3 <- rlogis(N) < eta
plot(x, y, main=bquote( eta[i] == .(A) + .(B) * x[i] ))
text ( 0.5*max(x), 0.5, expression( Prob( y[i] == 1) == frac( 1 , 1 + exp(-eta[i] ))))
myglm1 <- glm ( y ~ x, family=binomial(link="logit") )
summary(myglm1)
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.601 -0.469 -0.103 0.356 2.825
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2236 0.1185 -10.3 <2e-16 ***
## x 0.3384 0.0221 15.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1349.97 on 999 degrees of freedom
## Residual deviance: 641.88 on 998 degrees of freedom
## AIC: 645.9
##
## Number of Fisher Scoring iterations: 6
termplot(myglm1)
library(effects)
## Error: there is no package called 'effects'
alleff<- all.effects(myglm1)
## Error: could not find function "all.effects"
plot(alleff)
## Error: object 'alleff' not found
## Just for fun....
myglm2 <- glm(y~x, family=quasibinomial)
summary(myglm2)
##
## Call:
## glm(formula = y ~ x, family = quasibinomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.601 -0.469 -0.103 0.356 2.825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.2236 0.1059 -11.6 <2e-16 ***
## x 0.3384 0.0198 17.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.7998)
##
## Null deviance: 1349.97 on 999 degrees of freedom
## Residual deviance: 641.88 on 998 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
### Mixed model: random intercept with large variance
eta <- A + B * x + 5 * rnorm(N)
pi <- exp(eta)/(1+exp(eta))
myunif <- runif(N)
y <- ifelse(myunif < pi, 1, 0)
plot(x,y, main=bquote( eta[i] == .(A) + .(B) * x[i] + u[i]))
text ( 0.5*max(x), 0.5, expression( Prob( y[i] == 1) == frac( 1 , 1 + exp(-eta[i] ))))
### Parameter estimates go to hell, as expected
myglm3 <- glm ( y ~ x, family=binomial(link="logit") )
summary(myglm3)
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.971 -0.998 -0.610 1.086 2.066
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.30864 0.06967 -4.43 9.4e-06 ***
## x 0.08634 0.00787 10.98 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1377.4 on 999 degrees of freedom
## Residual deviance: 1229.7 on 998 degrees of freedom
## AIC: 1234
##
## Number of Fisher Scoring iterations: 4
### Why doesn't quasibinomial show more evidence of the random intercept?
myglm4 <- glm(y~x, family=quasibinomial)
summary(myglm4)
##
## Call:
## glm(formula = y ~ x, family = quasibinomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.971 -0.998 -0.610 1.086 2.066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.30864 0.06961 -4.43 1e-05 ***
## x 0.08634 0.00786 10.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.9983)
##
## Null deviance: 1377.4 on 999 degrees of freedom
## Residual deviance: 1229.7 on 998 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
## Am on thin ice here. The extra variance in the model isn't
## an identifiable effect, the estimation process itself forces
## the variance to 1. Thus the slope coefficient is "squished"
## in an understandable way.
## TODO: I need to introduce "clustering" with the random effect
## so I can fit this with a generalized linear mixed model.