## Paul Johnson
## 2014-05-02
## Understanding predicted values and creating confidence
## interval for logistic regressions via glm.
## Ever notice that predict for lm has argument "interval"
## but predict for glm does not?
## For a long time, I wondered about what to do about that. How get
## confidence intervals on predicted values from glm regressions? The
## predict.glm method has no argument interval="conf" or such. I've
## been checking into that. Apparently, there are many competing
## methods for creating confidence intervals on predicted values. Some
## programs simply ignore the controversy, asserting that one
## particular method ought to be used. Others provide a laundry list
## of methods. Others, avoid it. I'll make a list, add to the
## end of this code. Will do later.
## Step one is to understand what predict.lm does, then see
## what we can do about predict.glm.
## make up some data suitable for logistic regression.
B0 = -3.9
B1 = 0.08
x <- sort(rnorm(400, m = 50, sd = 10))
eta <- B0 + B1 * x
pr <- 1/(1 + exp(-eta))
y <- rbinom(400, p = pr, size = 1)
dat <- data.frame(x = x, y = y, eta = eta)
rm(x, y, eta) ## clean up!
plot(pr ~ eta, data = dat, main = "Underlying Nonlinear Model", type = "l", ylim = c(0,1))
points(y ~ eta, data = dat)
m1 <- lm(y ~ x, data = dat)
plot(y ~ x, data = dat, main = "Observed Values With Linear Model")
abline(m1, col = "red")
library(rockchalk)
## Loading required package: MASS
## Loading required package: car
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
##
## Attaching package: 'rockchalk'
##
## The following object is masked from 'package:MASS':
##
## mvrnorm
nd <- newdata(m1, predVals = list(x = plotSeq(dat$x, 20)))
nd[ , c("m1.fit", "m1.lwr", "m1.upr")] <- predict(m1, newdata = nd, type = "response", interval = "conf")
m2a <- glm(y ~ x, data = dat, family = binomial(link = "logit"))
plotCurves(m2a, plotx = "x", interval = "confidence", main = "Logit")
## rockchalk:::predCI: model's predict method does not return an interval. We will improvize with a Wald type approximation to the confidence interval
m2b <- glm(y ~ x, data = dat, family = binomial(link = "probit"))
plotCurves(m2b, plotx = "x", interval = "confidence", main = "Probit")
## rockchalk:::predCI: model's predict method does not return an interval. We will improvize with a Wald type approximation to the confidence interval
outreg(list("OLS" = m1, "Logit" = m2a, "Probit" = m2b), type = "html", showAIC = TRUE)
## [1] "whew, there is a DF"
## [1] "whew, there is a DF"
## [1] "whew, there is a DF"
##
## We are launching a browser to view that html file, which we have temporarily
## saved in /tmp/RtmpDXr19W/file5a98fa67f95.html
## You can copy that temp file, or create
## one of your own with R's cat function. Like this:
## myreg <-
## outreg(modelList = list(OLS = m1, Logit = m2a, Probit = m2b),
## type = "html", showAIC = TRUE)
##
## cat(myreg, file = "reg.html")
## Then open reg.html in a word processor or web browser.
m2alink <- predict(m2a, newdata = nd, type = "link", se.fit = TRUE)
m2alink <- as.data.frame(m2alink)
colnames(m2alink) <- paste("m2alink.", colnames(m2alink), sep ="")
nd <- cbind( nd, m2alink)
plot(m2alink.fit ~ x, data = nd, type = "l", main = "The Linear Predictor")
curve(B0 + B1 * x, from = min(nd$x), to = max(nd$x), lty = 2, add = TRUE)
legend("bottomright", legend = c("True", "Estimated"), lty = 1:2)
m2aresp <- predict(m2a, newdata = nd, type = "response", se.fit = TRUE)
m2aresp <- as.data.frame(m2aresp)
colnames(m2aresp) <- paste("m2aresp.", colnames(m2aresp), sep ="")
nd <- cbind(nd, m2aresp)
plot(y ~ x, data = dat, col = gray(.50), main = "The Observed Data and Predicted Values")
points(pr ~ x, data = dat, col = gray(.70), cex = 0.8, pch = 3)
lines(m2aresp.fit ~ x, data = nd)
legend("right", legend = c("Observed Values", "True Probabilities", "Predicted Prob. (solid line)"), col = c(gray(.70), gray(.50), 0), pch = c(1,3,0), bg = "white")
##May help to look at that:
View(nd)
plot(m2aresp.fit ~ m2alink.fit, data = nd)