## Paul Johnson
## 2013-04-01
## regression-doublelog-1.R

## Question. What do you get when you "log both sides" of a
## regression model?

## In economics and biology, the double-log model is very
## common. It describes an interactive, multiplicative process.
##
## With just one predictor, the theory is
##      y = b0 * x^b1 * exp(e)
## where I wish I could add subscripts on y, x, and e.
## Using the rules of logs, that simplifies to the linear
## model. If there were predictors x1,x2, and so forth,
## they would be added as additional multiplicative terms
## with exponents.

## Recall the log laws
## 1. log(x^b1) = b1 * log(x)
## 2. log(c*d) = log(c) + log(d)

set.seed(123123)
dat <- data.frame( x = runif(1000))
## The 3 parameters are b0, b1, and stde
b0 <- 0.1
b1 <- 1.2
stde <- 2
dat$y <- b0 * dat$x^(b1) * exp(rnorm(1000, m = 0, s = stde))

plot(y ~ x, data = dat, main="No Apparent Relationship?")

plot of chunk unnamed-chunk-1

## Interesting. Looks like nothing.

m0 <- lm(y ~ x, data = dat)
summary(m0)

## 
## Call:
## lm(formula = y ~ x, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -0.73  -0.40  -0.16   0.01  48.36 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.102      0.113   -0.90     0.37    
## x              0.840      0.197    4.26  2.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.8 on 998 degrees of freedom
## Multiple R-squared:  0.0179, Adjusted R-squared:  0.0169 
## F-statistic: 18.2 on 1 and 998 DF,  p-value: 2.21e-05

## One of the usual transformations we try is the log, which
## is justified either by idea of changing a "skewed" variable to
## a more normal one, or by the desire to fit the interactive
## model above.

## Test out plot's built-in antilogger
plot(y ~ x, data = dat, log = "xy")

plot of chunk unnamed-chunk-1

## Better fix the labels, urgently
plot(y ~ x, data = dat, log = "xy", xlab = "log(x)", ylab = "log(y)")

plot of chunk unnamed-chunk-1

## Previous same as this, this is more usual to the way I would do this:
plot(log(y) ~ log(x), data = dat)


m1 <- lm(log(y) ~ log(x), data = dat)
summary(m1)

## 
## Call:
## lm(formula = log(y) ~ log(x), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.157 -1.393  0.071  1.349  6.308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.3610     0.0905   -26.1   <2e-16 ***
## log(x)        1.1979     0.0634    18.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.01 on 998 degrees of freedom
## Multiple R-squared:  0.263,  Adjusted R-squared:  0.263 
## F-statistic:  357 on 1 and 998 DF,  p-value: <2e-16

abline(m1, col = "red")

plot of chunk unnamed-chunk-1

## The challenge of interpretation is to get predicted
## values in a scale that interests us.


## I notice termplot does the same thing that rockchalk::plotCurves
## will do
termplot(m1, partial = TRUE, se = TRUE)

library(rockchalk)

## Loading required package: MASS
## Loading required package: car
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
## 
## Attaching package: 'rockchalk'
## 
## The following object is masked from 'package:MASS':
## 
##     mvrnorm

plot of chunk unnamed-chunk-1

## needs version 1.7.2 or newer
m1pc <- plotCurves(m1, plotx = "x", interval = "conf")

plot of chunk unnamed-chunk-1

## if your rockchalk is not on the development path, can still
## construct that manually. Here's how
nd <- data.frame(x = plotSeq(dat$x, 40))
pdat <- predict(m1, newdata = nd, interval = "conf")
head(pdat)

##       fit     lwr     upr
## 1 -10.895 -11.666 -10.125
## 2  -6.716  -7.065  -6.367
## 3  -5.904  -6.176  -5.632
## 4  -5.424  -5.653  -5.196
## 5  -5.083  -5.283  -4.883
## 6  -4.817  -4.997  -4.638

nd <- cbind(nd, pdat)
nd

##            x     fit     lwr     upr
## 1  0.0008053 -10.895 -11.666 -10.125
## 2  0.0263761  -6.716  -7.065  -6.367
## 3  0.0519469  -5.904  -6.176  -5.632
## 4  0.0775177  -5.424  -5.653  -5.196
## 5  0.1030885  -5.083  -5.283  -4.883
## 6  0.1286593  -4.817  -4.997  -4.638
## 7  0.1542301  -4.600  -4.764  -4.437
## 8  0.1798009  -4.417  -4.569  -4.264
## 9  0.2053717  -4.257  -4.401  -4.114
## 10 0.2309425  -4.117  -4.253  -3.980
## 11 0.2565133  -3.991  -4.123  -3.859
## 12 0.2820841  -3.877  -4.006  -3.749
## 13 0.3076549  -3.773  -3.899  -3.647
## 14 0.3332257  -3.677  -3.803  -3.552
## 15 0.3587965  -3.589  -3.714  -3.464
## 16 0.3843673  -3.506  -3.631  -3.382
## 17 0.4099381  -3.429  -3.555  -3.304
## 18 0.4355089  -3.357  -3.483  -3.230
## 19 0.4610797  -3.288  -3.417  -3.160
## 20 0.4866505  -3.224  -3.354  -3.094
## 21 0.5122213  -3.162  -3.294  -3.031
## 22 0.5377921  -3.104  -3.238  -2.970
## 23 0.5633629  -3.048  -3.185  -2.912
## 24 0.5889337  -2.995  -3.134  -2.857
## 25 0.6145045  -2.944  -3.085  -2.803
## 26 0.6400753  -2.896  -3.039  -2.752
## 27 0.6656461  -2.849  -2.994  -2.703
## 28 0.6912168  -2.803  -2.952  -2.655
## 29 0.7167876  -2.760  -2.911  -2.609
## 30 0.7423584  -2.718  -2.871  -2.565
## 31 0.7679292  -2.677  -2.833  -2.522
## 32 0.7935000  -2.638  -2.796  -2.480
## 33 0.8190708  -2.600  -2.761  -2.439
## 34 0.8446416  -2.563  -2.727  -2.400
## 35 0.8702124  -2.528  -2.693  -2.362
## 36 0.8957832  -2.493  -2.661  -2.325
## 37 0.9213540  -2.459  -2.630  -2.289
## 38 0.9469248  -2.426  -2.599  -2.254
## 39 0.9724956  -2.394  -2.569  -2.219
## 40 0.9980664  -2.363  -2.541  -2.186

plot(log(y) ~ x, data = dat, col = gray(.7))
lines(fit ~ x, data = nd, col = "red")
lines(lwr ~ x, data = nd, col = "red", lty = 2)
lines(upr ~ x, data = nd, col = "red", lty = 2)

plot of chunk unnamed-chunk-1

## I really want to plot this in the (x,y) plane
## not the log(x) or log(y). Here's an easy way
## using the results from before.


nd$ypred <- exp(nd$fit)
head(nd)

##           x     fit     lwr     upr     ypred
## 1 0.0008053 -10.895 -11.666 -10.125 1.854e-05
## 2 0.0263761  -6.716  -7.065  -6.367 1.212e-03
## 3 0.0519469  -5.904  -6.176  -5.632 2.729e-03
## 4 0.0775177  -5.424  -5.653  -5.196 4.408e-03
## 5 0.1030885  -5.083  -5.283  -4.883 6.202e-03
## 6 0.1286593  -4.817  -4.997  -4.638 8.087e-03

titl <- "double-log regression in x,y space"
plot(y ~ x, data = dat, col = gray(.8), main = titl)
lines(ypred ~ x, data = nd, col = "red", lwd = 2)

plot of chunk unnamed-chunk-1

## The student can go back to the beginning and
## change the coefficients to see how these plots
## might change.