## Paul Johnson
## 2013-04-01
## regression-doublelog-1.R
## Question. What do you get when you "log both sides" of a
## regression model?
## In economics and biology, the double-log model is very
## common. It describes an interactive, multiplicative process.
##
## With just one predictor, the theory is
## y = b0 * x^b1 * exp(e)
## where I wish I could add subscripts on y, x, and e.
## Using the rules of logs, that simplifies to the linear
## model. If there were predictors x1,x2, and so forth,
## they would be added as additional multiplicative terms
## with exponents.
## Recall the log laws
## 1. log(x^b1) = b1 * log(x)
## 2. log(c*d) = log(c) + log(d)
set.seed(123123)
dat <- data.frame( x = runif(1000))
## The 3 parameters are b0, b1, and stde
b0 <- 0.1
b1 <- 1.2
stde <- 2
dat$y <- b0 * dat$x^(b1) * exp(rnorm(1000, m = 0, s = stde))
plot(y ~ x, data = dat, main="No Apparent Relationship?")
## Interesting. Looks like nothing.
m0 <- lm(y ~ x, data = dat)
summary(m0)
##
## Call:
## lm(formula = y ~ x, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73 -0.40 -0.16 0.01 48.36
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.102 0.113 -0.90 0.37
## x 0.840 0.197 4.26 2.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.8 on 998 degrees of freedom
## Multiple R-squared: 0.0179, Adjusted R-squared: 0.0169
## F-statistic: 18.2 on 1 and 998 DF, p-value: 2.21e-05
## One of the usual transformations we try is the log, which
## is justified either by idea of changing a "skewed" variable to
## a more normal one, or by the desire to fit the interactive
## model above.
## Test out plot's built-in antilogger
plot(y ~ x, data = dat, log = "xy")
## Better fix the labels, urgently
plot(y ~ x, data = dat, log = "xy", xlab = "log(x)", ylab = "log(y)")
## Previous same as this, this is more usual to the way I would do this:
plot(log(y) ~ log(x), data = dat)
m1 <- lm(log(y) ~ log(x), data = dat)
summary(m1)
##
## Call:
## lm(formula = log(y) ~ log(x), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.157 -1.393 0.071 1.349 6.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.3610 0.0905 -26.1 <2e-16 ***
## log(x) 1.1979 0.0634 18.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.01 on 998 degrees of freedom
## Multiple R-squared: 0.263, Adjusted R-squared: 0.263
## F-statistic: 357 on 1 and 998 DF, p-value: <2e-16
abline(m1, col = "red")
## The challenge of interpretation is to get predicted
## values in a scale that interests us.
## I notice termplot does the same thing that rockchalk::plotCurves
## will do
termplot(m1, partial = TRUE, se = TRUE)
library(rockchalk)
## Loading required package: MASS
## Loading required package: car
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
##
## Attaching package: 'rockchalk'
##
## The following object is masked from 'package:MASS':
##
## mvrnorm
## needs version 1.7.2 or newer
m1pc <- plotCurves(m1, plotx = "x", interval = "conf")
## if your rockchalk is not on the development path, can still
## construct that manually. Here's how
nd <- data.frame(x = plotSeq(dat$x, 40))
pdat <- predict(m1, newdata = nd, interval = "conf")
head(pdat)
## fit lwr upr
## 1 -10.895 -11.666 -10.125
## 2 -6.716 -7.065 -6.367
## 3 -5.904 -6.176 -5.632
## 4 -5.424 -5.653 -5.196
## 5 -5.083 -5.283 -4.883
## 6 -4.817 -4.997 -4.638
nd <- cbind(nd, pdat)
nd
## x fit lwr upr
## 1 0.0008053 -10.895 -11.666 -10.125
## 2 0.0263761 -6.716 -7.065 -6.367
## 3 0.0519469 -5.904 -6.176 -5.632
## 4 0.0775177 -5.424 -5.653 -5.196
## 5 0.1030885 -5.083 -5.283 -4.883
## 6 0.1286593 -4.817 -4.997 -4.638
## 7 0.1542301 -4.600 -4.764 -4.437
## 8 0.1798009 -4.417 -4.569 -4.264
## 9 0.2053717 -4.257 -4.401 -4.114
## 10 0.2309425 -4.117 -4.253 -3.980
## 11 0.2565133 -3.991 -4.123 -3.859
## 12 0.2820841 -3.877 -4.006 -3.749
## 13 0.3076549 -3.773 -3.899 -3.647
## 14 0.3332257 -3.677 -3.803 -3.552
## 15 0.3587965 -3.589 -3.714 -3.464
## 16 0.3843673 -3.506 -3.631 -3.382
## 17 0.4099381 -3.429 -3.555 -3.304
## 18 0.4355089 -3.357 -3.483 -3.230
## 19 0.4610797 -3.288 -3.417 -3.160
## 20 0.4866505 -3.224 -3.354 -3.094
## 21 0.5122213 -3.162 -3.294 -3.031
## 22 0.5377921 -3.104 -3.238 -2.970
## 23 0.5633629 -3.048 -3.185 -2.912
## 24 0.5889337 -2.995 -3.134 -2.857
## 25 0.6145045 -2.944 -3.085 -2.803
## 26 0.6400753 -2.896 -3.039 -2.752
## 27 0.6656461 -2.849 -2.994 -2.703
## 28 0.6912168 -2.803 -2.952 -2.655
## 29 0.7167876 -2.760 -2.911 -2.609
## 30 0.7423584 -2.718 -2.871 -2.565
## 31 0.7679292 -2.677 -2.833 -2.522
## 32 0.7935000 -2.638 -2.796 -2.480
## 33 0.8190708 -2.600 -2.761 -2.439
## 34 0.8446416 -2.563 -2.727 -2.400
## 35 0.8702124 -2.528 -2.693 -2.362
## 36 0.8957832 -2.493 -2.661 -2.325
## 37 0.9213540 -2.459 -2.630 -2.289
## 38 0.9469248 -2.426 -2.599 -2.254
## 39 0.9724956 -2.394 -2.569 -2.219
## 40 0.9980664 -2.363 -2.541 -2.186
plot(log(y) ~ x, data = dat, col = gray(.7))
lines(fit ~ x, data = nd, col = "red")
lines(lwr ~ x, data = nd, col = "red", lty = 2)
lines(upr ~ x, data = nd, col = "red", lty = 2)
## I really want to plot this in the (x,y) plane
## not the log(x) or log(y). Here's an easy way
## using the results from before.
nd$ypred <- exp(nd$fit)
head(nd)
## x fit lwr upr ypred
## 1 0.0008053 -10.895 -11.666 -10.125 1.854e-05
## 2 0.0263761 -6.716 -7.065 -6.367 1.212e-03
## 3 0.0519469 -5.904 -6.176 -5.632 2.729e-03
## 4 0.0775177 -5.424 -5.653 -5.196 4.408e-03
## 5 0.1030885 -5.083 -5.283 -4.883 6.202e-03
## 6 0.1286593 -4.817 -4.997 -4.638 8.087e-03
titl <- "double-log regression in x,y space"
plot(y ~ x, data = dat, col = gray(.8), main = titl)
lines(ypred ~ x, data = nd, col = "red", lwd = 2)
## The student can go back to the beginning and
## change the coefficients to see how these plots
## might change.