## Paul Johnson
## 2013-04-01
## regression-doublelog-1.R

## Question. What do you get when you "log both sides" of a
## regression model?

## In economics and biology, the double-log model is very
## common. It describes an interactive, multiplicative process.
##
## With just one predictor, the theory is
##      y = b0 * x^b1 * exp(e)
## where I wish I could add subscripts on y, x, and e.
## Using the rules of logs, that simplifies to the linear
## model. If there were predictors x1,x2, and so forth,
## with exponents.

## Recall the log laws
## 1. log(x^b1) = b1 * log(x)
## 2. log(c*d) = log(c) + log(d)

set.seed(123123)
dat <- data.frame( x = runif(1000))
## The 3 parameters are b0, b1, and stde
b0 <- 0.1
b1 <- 1.2
stde <- 2
dat$y <- b0 * dat$x^(b1) * exp(rnorm(1000, m = 0, s = stde))

plot(y ~ x, data = dat, main="No Apparent Relationship?")

## Interesting. Looks like nothing.

m0 <- lm(y ~ x, data = dat)
summary(m0)
##
## Call:
## lm(formula = y ~ x, data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
##  -0.73  -0.40  -0.16   0.01  48.36
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -0.102      0.113   -0.90     0.37
## x              0.840      0.197    4.26  2.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.8 on 998 degrees of freedom
## Multiple R-squared:  0.0179, Adjusted R-squared:  0.0169
## F-statistic: 18.2 on 1 and 998 DF,  p-value: 2.21e-05
## One of the usual transformations we try is the log, which
## is justified either by idea of changing a "skewed" variable to
## a more normal one, or by the desire to fit the interactive
## model above.

## Test out plot's built-in antilogger
plot(y ~ x, data = dat, log = "xy")

## Better fix the labels, urgently
plot(y ~ x, data = dat, log = "xy", xlab = "log(x)", ylab = "log(y)")

## Previous same as this, this is more usual to the way I would do this:
plot(log(y) ~ log(x), data = dat)

m1 <- lm(log(y) ~ log(x), data = dat)
summary(m1)
##
## Call:
## lm(formula = log(y) ~ log(x), data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -6.157 -1.393  0.071  1.349  6.308
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -2.3610     0.0905   -26.1   <2e-16 ***
## log(x)        1.1979     0.0634    18.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.01 on 998 degrees of freedom
## Multiple R-squared:  0.263,  Adjusted R-squared:  0.263
## F-statistic:  357 on 1 and 998 DF,  p-value: <2e-16
abline(m1, col = "red")

## The challenge of interpretation is to get predicted
## values in a scale that interests us.

## I notice termplot does the same thing that rockchalk::plotCurves
## will do
termplot(m1, partial = TRUE, se = TRUE)

library(rockchalk)
##
## Attaching package: 'rockchalk'
##
## The following object is masked from 'package:MASS':
##
##     mvrnorm

## needs version 1.7.2 or newer
m1pc <- plotCurves(m1, plotx = "x", interval = "conf")

## if your rockchalk is not on the development path, can still
## construct that manually. Here's how
nd <- data.frame(x = plotSeq(dat$x, 40)) pdat <- predict(m1, newdata = nd, interval = "conf") head(pdat) ## fit lwr upr ## 1 -10.895 -11.666 -10.125 ## 2 -6.716 -7.065 -6.367 ## 3 -5.904 -6.176 -5.632 ## 4 -5.424 -5.653 -5.196 ## 5 -5.083 -5.283 -4.883 ## 6 -4.817 -4.997 -4.638 nd <- cbind(nd, pdat) nd ## x fit lwr upr ## 1 0.0008053 -10.895 -11.666 -10.125 ## 2 0.0263761 -6.716 -7.065 -6.367 ## 3 0.0519469 -5.904 -6.176 -5.632 ## 4 0.0775177 -5.424 -5.653 -5.196 ## 5 0.1030885 -5.083 -5.283 -4.883 ## 6 0.1286593 -4.817 -4.997 -4.638 ## 7 0.1542301 -4.600 -4.764 -4.437 ## 8 0.1798009 -4.417 -4.569 -4.264 ## 9 0.2053717 -4.257 -4.401 -4.114 ## 10 0.2309425 -4.117 -4.253 -3.980 ## 11 0.2565133 -3.991 -4.123 -3.859 ## 12 0.2820841 -3.877 -4.006 -3.749 ## 13 0.3076549 -3.773 -3.899 -3.647 ## 14 0.3332257 -3.677 -3.803 -3.552 ## 15 0.3587965 -3.589 -3.714 -3.464 ## 16 0.3843673 -3.506 -3.631 -3.382 ## 17 0.4099381 -3.429 -3.555 -3.304 ## 18 0.4355089 -3.357 -3.483 -3.230 ## 19 0.4610797 -3.288 -3.417 -3.160 ## 20 0.4866505 -3.224 -3.354 -3.094 ## 21 0.5122213 -3.162 -3.294 -3.031 ## 22 0.5377921 -3.104 -3.238 -2.970 ## 23 0.5633629 -3.048 -3.185 -2.912 ## 24 0.5889337 -2.995 -3.134 -2.857 ## 25 0.6145045 -2.944 -3.085 -2.803 ## 26 0.6400753 -2.896 -3.039 -2.752 ## 27 0.6656461 -2.849 -2.994 -2.703 ## 28 0.6912168 -2.803 -2.952 -2.655 ## 29 0.7167876 -2.760 -2.911 -2.609 ## 30 0.7423584 -2.718 -2.871 -2.565 ## 31 0.7679292 -2.677 -2.833 -2.522 ## 32 0.7935000 -2.638 -2.796 -2.480 ## 33 0.8190708 -2.600 -2.761 -2.439 ## 34 0.8446416 -2.563 -2.727 -2.400 ## 35 0.8702124 -2.528 -2.693 -2.362 ## 36 0.8957832 -2.493 -2.661 -2.325 ## 37 0.9213540 -2.459 -2.630 -2.289 ## 38 0.9469248 -2.426 -2.599 -2.254 ## 39 0.9724956 -2.394 -2.569 -2.219 ## 40 0.9980664 -2.363 -2.541 -2.186 plot(log(y) ~ x, data = dat, col = gray(.7)) lines(fit ~ x, data = nd, col = "red") lines(lwr ~ x, data = nd, col = "red", lty = 2) lines(upr ~ x, data = nd, col = "red", lty = 2) ## I really want to plot this in the (x,y) plane ## not the log(x) or log(y). Here's an easy way ## using the results from before. nd$ypred <- exp(nd\$fit)
##           x     fit     lwr     upr     ypred
## 1 0.0008053 -10.895 -11.666 -10.125 1.854e-05
## 2 0.0263761  -6.716  -7.065  -6.367 1.212e-03
## 3 0.0519469  -5.904  -6.176  -5.632 2.729e-03
## 4 0.0775177  -5.424  -5.653  -5.196 4.408e-03
## 5 0.1030885  -5.083  -5.283  -4.883 6.202e-03
## 6 0.1286593  -4.817  -4.997  -4.638 8.087e-03
titl <- "double-log regression in x,y space"
plot(y ~ x, data = dat, col = gray(.8), main = titl)
lines(ypred ~ x, data = nd, col = "red", lwd = 2)

## The student can go back to the beginning and
## change the coefficients to see how these plots
## might change.