## Paul Johnson
## 2013-04-11
## regression-quadratic-1.R
##
## Explore quadratic regression. Explain ways to fit that in R.
## Demonstrates use of predict, newdata, and R's poly() function.
##
## From now on, anybody who wants help with quadratic regressions,
## well, has to run this through R. And read a lot of pithy comments.


## Manufacture some data.

set.seed(444532)
x3sd <- 0.5
dat <- data.frame(x1 = rnorm(200),
                  x2 = rnorm(200, s = 1),
                  x3 = rnorm(200, m = 1, s = x3sd))
b0 <- 0.4
b1 <- 0.7
b2 <- 1.5
b3 <- -0.9
b4 <- 1.4
d <- 2
stde <- 1.5
dat$y <- with(dat,
              b0 + b1 * x1 + b2 * x2 + b3 * x3 + b4 * x3^2 + rnorm(200, s = stde))

## That has a very minor curvature in it. The key coefficient
## here is b4 = 1.4, meaning we have a U-shaped curve in x3.

## Plot just the quadratic part, to get the feeling. Note the plotmath.
## Only try that on a closed course with a trained driver :)
quad <- function(x) b0 + b3 *x + b4 * x^2
curve(quad, from = -2, to = 2, xlab = "x3", ylab = substitute(y == A + B * x3 + C * x3^2, list(A = b0, B = b3, C = b4) ))

plot of chunk unnamed-chunk-1

## Suppose you estimate a linear model
m0 <- lm(y ~ x1 + x2 + x3, data = dat)
summary(m0)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.328 -1.182 -0.056  1.095  4.855 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.646      0.269   -2.40    0.017 *  
## x1             0.787      0.121    6.51  6.3e-10 ***
## x2             1.408      0.114   12.37  < 2e-16 ***
## x3             2.035      0.248    8.21  2.8e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.63 on 196 degrees of freedom
## Multiple R-squared:  0.563,  Adjusted R-squared:  0.557 
## F-statistic: 84.3 on 3 and 196 DF,  p-value: <2e-16
## Please note: just because your "p value" is "really great", it does
## not mean you have "the right" model.

## There's a little bit of nonlinearity, as in the residual plot
plot(m0, which = 1)

plot of chunk unnamed-chunk-1

## Its tough to see, though.
termplot(m0, partial.resid = TRUE, terms = "x3")

plot of chunk unnamed-chunk-1

## The "truth" is quadratic. We'd like to estimate
## b4, and there are many ways that this can be done.
##
## I'm going to show 4 ways, one of which is more strongly
## recommended, but, as we see, they are all really
## interchangeable.

## Method 1. The old fashioned way.
## Create a new variable x3square and insert that.
dat$x3square <- dat$x3 * dat$x3
m1 <- lm(y ~ x1 + x2 + x3 + x3square, data = dat)
summary(m1)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.1115     0.3945    0.28     0.78    
## x1            0.7556     0.1199    6.30  1.9e-09 ***
## x2            1.4197     0.1123   12.64  < 2e-16 ***
## x3            0.0678     0.7970    0.09     0.93    
## x3square      0.9924     0.3828    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
## I'll ask for variables one at a time so you see the problem.
## Interactively, I'd run this 
## termplot(m1, partial.resid = TRUE)
## Now, I'm in a script, and I have to do:
termplot(m1, partial.resid = TRUE, terms = "x1")

plot of chunk unnamed-chunk-1

termplot(m1, partial.resid = TRUE, terms = "x2")

plot of chunk unnamed-chunk-1

termplot(m1, partial.resid = TRUE, terms = "x3")

plot of chunk unnamed-chunk-1

termplot(m1, partial.resid = TRUE, terms = "x3square")

plot of chunk unnamed-chunk-1

## You don't see much curve because the parts of
## x3 are separated into 2 columns, each of which
## is seen as a linear element in the regression.

## While we are here, look at the predictor "design matrix":
head(model.matrix(m1, 10))
##   (Intercept)       x1      x2     x3 x3square
## 1           1 -0.96188  1.2770 0.5939   0.3527
## 2           1  1.30351  0.3905 0.6015   0.3618
## 3           1  1.65021  0.2225 0.9298   0.8645
## 4           1  0.72121 -1.0704 0.4411   0.1945
## 5           1  0.05278  0.4767 0.7713   0.5950
## 6           1  0.39589 -0.3344 0.7803   0.6088
## Method 2. Technically equivalent to Method 1. We can introduce
## the squared term and R will construct the equivalent of
## x3square for us, automatically.

## Method 2a. This is the most elementary way, it is the way new
## users always want to do it.

m2a <- lm(y ~ x1 + x2 + x3 + I(x3*x3), data = dat)
## same as
## m2a <- lm(y ~ x1 + x2 + x3 + I(x3^2), data = dat)
summary(m2a)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + I(x3 * x3), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.1115     0.3945    0.28     0.78    
## x1            0.7556     0.1199    6.30  1.9e-09 ***
## x2            1.4197     0.1123   12.64  < 2e-16 ***
## x3            0.0678     0.7970    0.09     0.93    
## I(x3 * x3)    0.9924     0.3828    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
## Look at the predictor matrix:
head(model.matrix(m2a, 10))
##   (Intercept)       x1      x2     x3 I(x3 * x3)
## 1           1 -0.96188  1.2770 0.5939     0.3527
## 2           1  1.30351  0.3905 0.6015     0.3618
## 3           1  1.65021  0.2225 0.9298     0.8645
## 4           1  0.72121 -1.0704 0.4411     0.1945
## 5           1  0.05278  0.4767 0.7713     0.5950
## 6           1  0.39589 -0.3344 0.7803     0.6088
## Method 2b. Introduced only to
## show the poly() function.  The poly function can be "forced" to
## create same x3 and x3squared vectors by the raw = TRUE argument.
m2b <- lm(y ~  x1 + x2 + poly(x3, degree = d, raw = TRUE), data=dat)
summary(m2b)
## 
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE), 
##     data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.1115     0.3945    0.28     0.78    
## x1                                  0.7556     0.1199    6.30  1.9e-09 ***
## x2                                  1.4197     0.1123   12.64  < 2e-16 ***
## poly(x3, degree = d, raw = TRUE)1   0.0678     0.7970    0.09     0.93    
## poly(x3, degree = d, raw = TRUE)2   0.9924     0.3828    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
## See? Same!
cbind(coef(summary(m2a))[, 1:2], coef(summary(m2b))[, 1:2])
##             Estimate Std. Error Estimate Std. Error
## (Intercept)  0.11151     0.3945  0.11151     0.3945
## x1           0.75564     0.1199  0.75564     0.1199
## x2           1.41971     0.1123  1.41971     0.1123
## x3           0.06782     0.7970  0.06782     0.7970
## I(x3 * x3)   0.99240     0.3828  0.99240     0.3828
## Look at the predictor matrix:
head(model.matrix(m2b, 10))
##   (Intercept)       x1      x2 poly(x3, degree = d, raw = TRUE)1
## 1           1 -0.96188  1.2770                            0.5939
## 2           1  1.30351  0.3905                            0.6015
## 3           1  1.65021  0.2225                            0.9298
## 4           1  0.72121 -1.0704                            0.4411
## 5           1  0.05278  0.4767                            0.7713
## 6           1  0.39589 -0.3344                            0.7803
##   poly(x3, degree = d, raw = TRUE)2
## 1                            0.3527
## 2                            0.3618
## 3                            0.8645
## 4                            0.1945
## 5                            0.5950
## 6                            0.6088
## Method 3. The R recommended method poly().
## We are recommended to use "orthogonal polynomials" instead.
## This is more numerically stable.
m3 <- lm(y ~  x1 + x2 +  poly(x3, degree = d), data=dat)
summary(m3)
## 
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.341      0.114   11.73  < 2e-16 ***
## x1                       0.756      0.120    6.30  1.9e-09 ***
## x2                       1.420      0.112   12.64  < 2e-16 ***
## poly(x3, degree = d)1   13.461      1.622    8.30  1.7e-14 ***
## poly(x3, degree = d)2    4.201      1.620    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
termplot(m3, partial.resid = TRUE, terms = "x1")

plot of chunk unnamed-chunk-1

termplot(m3, partial.resid = TRUE, terms = "x2")

plot of chunk unnamed-chunk-1

termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")


## Look at the predictor matrix, you see the poly() created
## 2 columns:
head(model.matrix(m3, 10))
##   (Intercept)       x1      x2 poly(x3, degree = d)1 poly(x3, degree = d)2
## 1           1 -0.96188  1.2770              -0.05747              -0.01570
## 2           1  1.30351  0.3905              -0.05632              -0.01710
## 3           1  1.65021  0.2225              -0.00691              -0.05140
## 4           1  0.72121 -1.0704              -0.08047               0.01819
## 5           1  0.05278  0.4767              -0.03076              -0.04120
## 6           1  0.39589 -0.3344              -0.02941              -0.04209
## Hypo test rejects m0, incidentally:
anova(m3, m0, test = "F")
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2 + poly(x3, degree = d)
## Model 2: y ~ x1 + x2 + x3
##   Res.Df RSS Df Sum of Sq    F Pr(>F)  
## 1    195 506                           
## 2    196 523 -1     -17.4 6.72   0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Coefficients look different, but that's an illusion.
## From a given combination of (x1, x2, x3), the predictions
## are exactly the same. See?
## Predicted values are identical.
cbind(m1$fitted, m2b$fitted, m3$fitted)
##          [,1]      [,2]      [,3]
## 1    1.587901  1.587901  1.587901
## 2    2.050746  2.050746  2.050746
## 3    2.595258  2.595258  2.595258
## 4   -0.640246 -0.640246 -0.640246
## 5    1.470894  1.470894  1.470894
## 6    0.592977  0.592977  0.592977
## 7    3.512096  3.512096  3.512096
## 8   -1.888601 -1.888601 -1.888601
## 9    2.970413  2.970413  2.970413
## 10   1.500665  1.500665  1.500665
## 11   0.842083  0.842083  0.842083
## 12   3.687050  3.687050  3.687050
## 13   2.621645  2.621645  2.621645
## 14   1.563314  1.563314  1.563314
## 15  -0.681850 -0.681850 -0.681850
## 16   0.254745  0.254745  0.254745
## 17   3.127178  3.127178  3.127178
## 18   3.781417  3.781417  3.781417
## 19  -0.260491 -0.260491 -0.260491
## 20   2.034126  2.034126  2.034126
## 21   0.002801  0.002801  0.002801
## 22   3.327749  3.327749  3.327749
## 23   0.179792  0.179792  0.179792
## 24   2.656736  2.656736  2.656736
## 25   2.146502  2.146502  2.146502
## 26   1.915380  1.915380  1.915380
## 27   3.366620  3.366620  3.366620
## 28   4.799667  4.799667  4.799667
## 29  -0.408718 -0.408718 -0.408718
## 30   1.707546  1.707546  1.707546
## 31   3.598631  3.598631  3.598631
## 32  -0.221897 -0.221897 -0.221897
## 33  -2.435707 -2.435707 -2.435707
## 34   1.280965  1.280965  1.280965
## 35   1.306584  1.306584  1.306584
## 36   1.403266  1.403266  1.403266
## 37  -0.566250 -0.566250 -0.566250
## 38   2.595908  2.595908  2.595908
## 39   1.807318  1.807318  1.807318
## 40   1.370907  1.370907  1.370907
## 41   0.579466  0.579466  0.579466
## 42   1.261429  1.261429  1.261429
## 43  -0.937704 -0.937704 -0.937704
## 44   2.036284  2.036284  2.036284
## 45  -0.925975 -0.925975 -0.925975
## 46   4.121259  4.121259  4.121259
## 47   4.325489  4.325489  4.325489
## 48   3.304361  3.304361  3.304361
## 49   1.822078  1.822078  1.822078
## 50   4.225504  4.225504  4.225504
## 51   2.066135  2.066135  2.066135
## 52   0.510657  0.510657  0.510657
## 53   3.573680  3.573680  3.573680
## 54   4.026918  4.026918  4.026918
## 55   0.852517  0.852517  0.852517
## 56   4.198544  4.198544  4.198544
## 57   0.439740  0.439740  0.439740
## 58   2.465801  2.465801  2.465801
## 59   3.580910  3.580910  3.580910
## 60  -2.126628 -2.126628 -2.126628
## 61   2.751106  2.751106  2.751106
## 62   3.144364  3.144364  3.144364
## 63   0.091496  0.091496  0.091496
## 64   2.114355  2.114355  2.114355
## 65  -1.800662 -1.800662 -1.800662
## 66  -2.011919 -2.011919 -2.011919
## 67  -0.928130 -0.928130 -0.928130
## 68   0.914971  0.914971  0.914971
## 69   1.492407  1.492407  1.492407
## 70   2.871130  2.871130  2.871130
## 71  -2.812713 -2.812713 -2.812713
## 72   3.681676  3.681676  3.681676
## 73   1.327964  1.327964  1.327964
## 74   2.120344  2.120344  2.120344
## 75   2.557755  2.557755  2.557755
## 76   3.370278  3.370278  3.370278
## 77   0.603910  0.603910  0.603910
## 78  -0.836674 -0.836674 -0.836674
## 79   1.059092  1.059092  1.059092
## 80   0.705270  0.705270  0.705270
## 81  -2.008946 -2.008946 -2.008946
## 82   0.899437  0.899437  0.899437
## 83  -0.650376 -0.650376 -0.650376
## 84  -0.523109 -0.523109 -0.523109
## 85   5.175379  5.175379  5.175379
## 86   3.214712  3.214712  3.214712
## 87   1.564456  1.564456  1.564456
## 88   0.319828  0.319828  0.319828
## 89  -0.373367 -0.373367 -0.373367
## 90   3.629702  3.629702  3.629702
## 91  -0.918426 -0.918426 -0.918426
## 92  -0.136196 -0.136196 -0.136196
## 93  -0.147370 -0.147370 -0.147370
## 94   0.181605  0.181605  0.181605
## 95  -0.016103 -0.016103 -0.016103
## 96  -1.009314 -1.009314 -1.009314
## 97   0.943233  0.943233  0.943233
## 98   1.131804  1.131804  1.131804
## 99   0.315319  0.315319  0.315319
## 100  0.478852  0.478852  0.478852
## 101 -0.457249 -0.457249 -0.457249
## 102  3.254159  3.254159  3.254159
## 103  0.509734  0.509734  0.509734
## 104 -0.538809 -0.538809 -0.538809
## 105 -0.802339 -0.802339 -0.802339
## 106  3.948450  3.948450  3.948450
## 107  5.719634  5.719634  5.719634
## 108  0.957162  0.957162  0.957162
## 109  2.410540  2.410540  2.410540
## 110 -0.593465 -0.593465 -0.593465
## 111  2.376358  2.376358  2.376358
## 112  4.492950  4.492950  4.492950
## 113 -0.099627 -0.099627 -0.099627
## 114  2.327146  2.327146  2.327146
## 115 -2.561914 -2.561914 -2.561914
## 116  2.810497  2.810497  2.810497
## 117  2.215388  2.215388  2.215388
## 118  0.817026  0.817026  0.817026
## 119  2.261901  2.261901  2.261901
## 120  1.469913  1.469913  1.469913
## 121 -0.711707 -0.711707 -0.711707
## 122 -1.676843 -1.676843 -1.676843
## 123  1.097083  1.097083  1.097083
## 124  0.018402  0.018402  0.018402
## 125  0.031408  0.031408  0.031408
## 126  0.549038  0.549038  0.549038
## 127  3.144330  3.144330  3.144330
## 128  0.508807  0.508807  0.508807
## 129 -0.196184 -0.196184 -0.196184
## 130  1.295479  1.295479  1.295479
## 131  3.588403  3.588403  3.588403
## 132  0.998048  0.998048  0.998048
## 133  1.973393  1.973393  1.973393
## 134 -2.138805 -2.138805 -2.138805
## 135  2.420122  2.420122  2.420122
## 136  1.141248  1.141248  1.141248
## 137  3.007974  3.007974  3.007974
## 138  5.184813  5.184813  5.184813
## 139  0.277537  0.277537  0.277537
## 140  2.669536  2.669536  2.669536
## 141  1.985234  1.985234  1.985234
## 142  6.342566  6.342566  6.342566
## 143  2.661660  2.661660  2.661660
## 144  2.456213  2.456213  2.456213
## 145 -1.748226 -1.748226 -1.748226
## 146 -0.585198 -0.585198 -0.585198
## 147  4.343878  4.343878  4.343878
## 148  1.893083  1.893083  1.893083
## 149  3.044453  3.044453  3.044453
## 150  3.103273  3.103273  3.103273
## 151 -1.605090 -1.605090 -1.605090
## 152  3.366576  3.366576  3.366576
## 153  3.199743  3.199743  3.199743
## 154 -0.547855 -0.547855 -0.547855
## 155  3.997156  3.997156  3.997156
## 156  1.218089  1.218089  1.218089
## 157  2.887254  2.887254  2.887254
## 158  0.570990  0.570990  0.570990
## 159 -1.339621 -1.339621 -1.339621
## 160  1.302049  1.302049  1.302049
## 161  4.807972  4.807972  4.807972
## 162  2.777687  2.777687  2.777687
## 163  1.404771  1.404771  1.404771
## 164 -2.209261 -2.209261 -2.209261
## 165  6.036649  6.036649  6.036649
## 166 -0.662940 -0.662940 -0.662940
## 167 -0.117769 -0.117769 -0.117769
## 168 -0.223198 -0.223198 -0.223198
## 169  4.469197  4.469197  4.469197
## 170  0.956474  0.956474  0.956474
## 171  2.916498  2.916498  2.916498
## 172 -0.691866 -0.691866 -0.691866
## 173  2.106454  2.106454  2.106454
## 174  1.377192  1.377192  1.377192
## 175  1.000602  1.000602  1.000602
## 176  1.304451  1.304451  1.304451
## 177  0.708038  0.708038  0.708038
## 178  1.700724  1.700724  1.700724
## 179  1.205922  1.205922  1.205922
## 180  2.172935  2.172935  2.172935
## 181  1.420999  1.420999  1.420999
## 182  4.103009  4.103009  4.103009
## 183  4.749370  4.749370  4.749370
## 184  3.400597  3.400597  3.400597
## 185  2.805549  2.805549  2.805549
## 186  3.008310  3.008310  3.008310
## 187  2.077985  2.077985  2.077985
## 188 -0.453694 -0.453694 -0.453694
## 189  0.603055  0.603055  0.603055
## 190  2.256232  2.256232  2.256232
## 191  2.796632  2.796632  2.796632
## 192  0.094159  0.094159  0.094159
## 193 -0.033334 -0.033334 -0.033334
## 194  0.709930  0.709930  0.709930
## 195  3.304196  3.304196  3.304196
## 196  1.735480  1.735480  1.735480
## 197  3.891042  3.891042  3.891042
## 198  2.348177  2.348177  2.348177
## 199 -1.251348 -1.251348 -1.251348
## 200  2.021106  2.021106  2.021106
## That's not all. If you check the 95% CI's on the predicted values,
## or R^2, or anything logically comparable, they'll be same.

## Some people say it is very important to "mean center"
## the quadratic term. Lets do that, show it makes no
## difference whatsoever. x3c is mean centered:
dat$x3c <- scale(dat$x3, scale = FALSE) ## mean centered
m4 <- lm(y ~  x1 + x2 +  x3c + I(x3c*x3c), data=dat)
summary(m4)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.122      0.142    7.92  1.8e-13 ***
## x1              0.756      0.120    6.30  1.9e-09 ***
## x2              1.420      0.112   12.64  < 2e-16 ***
## x3c             2.004      0.244    8.20  3.2e-14 ***
## I(x3c * x3c)    0.992      0.383    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
## Superficially better...

## Coefficients look different, but that's an illusion
## Predicted values are identical
head(cbind(m1$fitted, m2b$fitted, m3$fitted, m4$fitted))
##      [,1]    [,2]    [,3]    [,4]
## 1  1.5879  1.5879  1.5879  1.5879
## 2  2.0507  2.0507  2.0507  2.0507
## 3  2.5953  2.5953  2.5953  2.5953
## 4 -0.6402 -0.6402 -0.6402 -0.6402
## 5  1.4709  1.4709  1.4709  1.4709
## 6  0.5930  0.5930  0.5930  0.5930
## Can you explain?


## It is a known fact that the estimate of the squared term
## is the same with centered data, but the estimate of the
## linear term changes. This helps to overview that

library(memisc)
## Loading required package: lattice
## Loading required package: methods
## Loading required package: MASS
## 
## Attaching package: 'memisc'
## 
## The following objects are masked from 'package:stats':
## 
##     contrasts, contr.sum, contr.treatment
## 
## The following object is masked from 'package:base':
## 
##     as.array
mtable(m1, m2b, m3, m4)
## 
## Calls:
## m1: lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat)
## m2b: lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE), 
##     data = dat)
## m3: lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat)
## m4: lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat)
## 
## ==========================================================================
##                                       m1        m2b       m3        m4    
## --------------------------------------------------------------------------
## (Intercept)                         0.112     0.112     1.341***  1.122***
##                                    (0.395)   (0.395)   (0.114)   (0.142)  
## x1                                  0.756***  0.756***  0.756***  0.756***
##                                    (0.120)   (0.120)   (0.120)   (0.120)  
## x2                                  1.420***  1.420***  1.420***  1.420***
##                                    (0.112)   (0.112)   (0.112)   (0.112)  
## x3                                  0.068                                 
##                                    (0.797)                                
## x3square                            0.992*                                
##                                    (0.383)                                
## poly(x3, degree = d, raw = TRUE)1             0.068                       
##                                              (0.797)                      
## poly(x3, degree = d, raw = TRUE)2             0.992*                      
##                                              (0.383)                      
## poly(x3, degree = d)1                                  13.461***          
##                                                        (1.622)            
## poly(x3, degree = d)2                                   4.201*            
##                                                        (1.620)            
## x3c                                                               2.004***
##                                                                  (0.244)  
## I(x3c * x3c)                                                      0.992*  
##                                                                  (0.383)  
## --------------------------------------------------------------------------
## R-squared                             0.578     0.578     0.578     0.578 
## adj. R-squared                        0.569     0.569     0.569     0.569 
## sigma                                 1.610     1.610     1.610     1.610 
## F                                    66.779    66.779    66.779    66.779 
## p                                     0.000     0.000     0.000     0.000 
## Log-likelihood                     -376.540  -376.540  -376.540  -376.540 
## Deviance                            505.649   505.649   505.649   505.649 
## AIC                                 765.081   765.081   765.081   765.081 
## BIC                                 784.871   784.871   784.871   784.871 
## N                                   200       200       200       200     
## ==========================================================================
## Once you understand why that change happens, you will begin to see
## why poly() is so helpful.

## And, in case you wonder, this is one of the cases where x3 and x3^2
## are strongly correlated with one another.

cor(model.matrix(m1)[ ,-1])
##                x1       x2       x3 x3square
## x1        1.00000 -0.03823 -0.11675 -0.07957
## x2       -0.03823  1.00000  0.03466  0.02005
## x3       -0.11675  0.03466  1.00000  0.95163
## x3square -0.07957  0.02005  0.95163  1.00000
## Note that termplot will cooperate if we type in the full
## name of the predictor from the model. Just using "x3" fails
termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")

plot of chunk unnamed-chunk-1

## Whereas the thing that seems "obvious" to me fails
termplot(m3, partial.resid = TRUE, terms = "x3")
## Error: subscript out of bounds
## You should see the same error I do:
## Error in predict.lm(model, type = "terms", se.fit = se, terms = terms) :

## That's one of the things I've tried to fix in rockchalk's plotCurves
library(rockchalk)
## Loading required package: car
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:memisc':
## 
##     recode
## 
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: Rcpp
## 
## Attaching package: 'rockchalk'
## 
## The following object is masked from 'package:MASS':
## 
##     mvrnorm
plotCurves(m3, plotx = "x3")

plot of chunk unnamed-chunk-1

plotCurves(m3, plotx = "x3", interval = "conf")

plot of chunk unnamed-chunk-1

## It is important to get used to the R predict() family of functions.
## A regression tool is not complete unless it has one.
## And using the newdata argument in predict is really important!
## Look what goes wrong if you accidentally omit a predictor
## from a newdata object
ndf1 <- data.frame(x2 = c(1), x3 = c(2))
predict(m1, newdata = ndf1)
## Error: object 'x1' not found
## fails
## Error in eval(expr, envir, enclos) : object 'x1' not found

## Now get it right
ndf2 <- data.frame(x1 = c(1), x2 = c(1), x3 = c(2))
predict(m2a, newdata = ndf2)
##     1 
## 6.392
## succeeded

## Now, we'd like to calculate predicted values for, say, 20 values of x3, evenly
## spaced across its range. We'll leave x1 and x2 at their averages.
## That's not a difficult calcuation, but why not use the handy functions I've
## prepared for it.

predictOMatic(m3, interval = "confidence")
## $x1
##        x1      x2     x3     fit     lwr     upr
## 1 -3.2550 0.04564 0.9757 -1.2725 -2.0931 -0.4518
## 2 -0.4605 0.04564 0.9757  0.8392  0.5389  1.1394
## 3  0.1471 0.04564 0.9757  1.2983  1.0173  1.5793
## 4  0.6096 0.04564 0.9757  1.6478  1.3346  1.9611
## 5  2.7511 0.04564 0.9757  3.2660  2.5598  3.9722
## 
## $x2
##        x1       x2     x3     fit      lwr     upr
## 1 0.07998 -2.68799 0.9757 -2.6334 -3.30626 -1.9605
## 2 0.07998 -0.59325 0.9757  0.3405  0.02425  0.6568
## 3 0.07998  0.04662 0.9757  1.2490  0.96937  1.5286
## 4 0.07998  0.77808 0.9757  2.2874  1.96742  2.6075
## 5 0.07998  2.52345 0.9757  4.7654  4.15512  5.3756
## 
## $x3
##        x1      x2      x3    fit     lwr    upr
## 1 0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671
## 2 0.07998 0.04564  0.6432 0.6910  0.4050 0.9769
## 3 0.07998 0.04564  0.9450 1.1870  0.9078 1.4661
## 4 0.07998 0.04564  1.3039 2.0125  1.7226 2.3024
## 5 0.07998 0.04564  2.0443 4.5229  3.6481 5.3977
m1pr1 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))), interval = "confidence")
m1pr1
##         x1      x2      x3    fit     lwr    upr
## 1  0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671
## 2  0.07998 0.04564  0.2631 0.3233 -0.1484 0.7950
## 3  0.07998 0.04564  0.4411 0.4597  0.1126 0.8068
## 4  0.07998 0.04564  0.5534 0.5781  0.2744 0.8819
## 5  0.07998 0.04564  0.5984 0.6326  0.3394 0.9259
## 6  0.07998 0.04564  0.6753 0.7351  0.4527 1.0174
## 7  0.07998 0.04564  0.7458 0.8392  0.5610 1.1175
## 8  0.07998 0.04564  0.8169 0.9543  0.6769 1.2318
## 9  0.07998 0.04564  0.8695 1.0460  0.7680 1.3239
## 10 0.07998 0.04564  0.9238 1.1463  0.8674 1.4251
## 11 0.07998 0.04564  0.9549 1.2064  0.9271 1.4857
## 12 0.07998 0.04564  1.0336 1.3670  1.0867 1.6473
## 13 0.07998 0.04564  1.0991 1.5100  1.2291 1.7909
## 14 0.07998 0.04564  1.1682 1.6702  1.3883 1.9522
## 15 0.07998 0.04564  1.2732 1.9317  1.6448 2.2186
## 16 0.07998 0.04564  1.3442 2.1211  1.8258 2.4164
## 17 0.07998 0.04564  1.4565 2.4409  2.1183 2.7635
## 18 0.07998 0.04564  1.6343 2.9981  2.5831 3.4131
## 19 0.07998 0.04564  1.7877 3.5295  2.9809 4.0781
## 20 0.07998 0.04564  2.0443 4.5229  3.6481 5.3977
## Wonder why that works? Here's a middle step that gives you the hint
newdata(m3, fl = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))))
##        x1      x2     x3
## 1 0.07998 0.04564 0.9757
plot(fit ~ x3, data = m1pr1, type = "l")
lines(upr ~ x3, data = m1pr1, col = "purple", lty = 4)
lines(lwr ~ x3, data = m1pr1, col = "purple", lty = 4)
legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")

plot of chunk unnamed-chunk-1

## Notice that the curves look bad, a little jagged. Can you figure
## out why?  For a hint, run this:


## I suggest users go to the beginning and fiddle the parameters. Make
## the x3sd bigger, or change b3 and b4. See what happens.

m1pr2 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 1000))), interval = "confidence")
plot(fit ~ x3, data = m1pr2, type = "l")
lines(upr ~ x3, data = m1pr2, col = "purple", lty = 4)
lines(lwr ~ x3, data = m1pr2, col = "purple", lty = 4)
legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")

plot of chunk unnamed-chunk-1

## Ooh la la, as we say in France :)


## Major points.
## 1. If the true relationship is quadratic, we may not find it unless
## we know to look for it. I have fiddled with the parameters for this
## model quite a bit.  In many cases, the anova() test is unequivocal,
## rejecting m0. But quite often, it is not powerful.
## Still, a theory that hints us to look for curvature in the effect of x3
## would be most welcome.
##
## 2. In any nonlinear model, an emphasis in the interpretation is
## analysis of predicted values. These are "marginal effects."  If x
## changes, how do we predict y changes? That requires us to use
## predict() methods in R.
##
## termplot does pretty well with these, the rockchalk::plotCurves
## function handles interactions and some nonlinearities that termplot
## does not manage.

## 3. Does it matter how we estimate these models?

## Some experts are emphatic about the need to mean-center x to
## ameliorate multicollinearity due to the similarity of x and x^2,
## but I believe that is completely wrong. Centering results in exactly
## the same predicted values, it is, literally, identical.

## Other R experts I trust strongly urge poly(x,2), not x + I(x*x) or such.
## I think there are several different reasons that people have in mind
## to justify that. I think some of the general prevention of digital
## computation round-off error have been addressed by improvements in the
## algorithms that are used to fit regressions. That's why we don't
## see a difference in the predicted values of the 4 methods.

## So students and I want to know "why do the smart people like
## poly() so much. I have finally grasped the answer, thanks to
## advice in the r-help email list in 2013.

## Gabor Grothendieck listed advantages of poly.  I'm pasting in
## his explanations and putting in example code from examples above.

##GG 1. One benefit is if you fit a higher degree polynomial using
##GG poly(x, n) the lower degree coefficients will agree with the fit using
##GG a lower n.

##PJ I can verify that. It is strictly literally true if there
## are no other predictors than the poly terms. Example:

m3 <- lm(y ~ poly(x3, degree = 2), data=dat)
m5 <- lm(y ~ poly(x3, degree = 3), data=dat)
m6 <- lm(y ~ poly(x3, degree = 4), data=dat)

mtable(m3, m5, m6)
## 
## Calls:
## m3: lm(formula = y ~ poly(x3, degree = 2), data = dat)
## m5: lm(formula = y ~ poly(x3, degree = 3), data = dat)
## m6: lm(formula = y ~ poly(x3, degree = 4), data = dat)
## 
## ====================================================
##                           m3        m5        m6    
## ----------------------------------------------------
## (Intercept)             1.467***  1.467***  1.467***
##                        (0.160)   (0.160)   (0.160)  
## poly(x3, degree = 2)1  12.968***                    
##                        (2.265)                      
## poly(x3, degree = 2)2   4.397                       
##                        (2.265)                      
## poly(x3, degree = 3)1            12.968***          
##                                  (2.264)            
## poly(x3, degree = 3)2             4.397             
##                                  (2.264)            
## poly(x3, degree = 3)3            -2.433             
##                                  (2.264)            
## poly(x3, degree = 4)1                      12.968***
##                                            (2.269)  
## poly(x3, degree = 4)2                       4.397   
##                                            (2.269)  
## poly(x3, degree = 4)3                      -2.433   
##                                            (2.269)  
## poly(x3, degree = 4)4                      -0.855   
##                                            (2.269)  
## ----------------------------------------------------
## R-squared                 0.156     0.161     0.162 
## adj. R-squared            0.148     0.149     0.145 
## sigma                     2.265     2.264     2.269 
## F                        18.272    12.576     9.426 
## p                         0.000     0.000     0.000 
## Log-likelihood         -445.805  -445.218  -445.145 
## Deviance               1010.794  1004.876  1004.145 
## AIC                     899.610   900.436   902.290 
## BIC                     912.803   916.927   922.080 
## N                       200       200       200     
## ====================================================
##PJ However, if the other predictors, they are not orthogonal to the
##PJ poly() terms, so the argument breaks down a little bit:

m3c <- lm(y ~  x1 + x2 +  poly(x3, degree = 2), data=dat)
m5c <- lm(y ~  x1 + x2 +  poly(x3, degree = 3), data=dat)
m6c <- lm(y ~  x1 + x2 +  poly(x3, degree = 4), data=dat)

mtable(m3c, m5c, m6c)
## 
## Calls:
## m3c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
## m5c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3), data = dat)
## m6c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4), data = dat)
## 
## ====================================================
##                           m3c       m5c       m6c   
## ----------------------------------------------------
## (Intercept)             1.341***  1.341***  1.340***
##                        (0.114)   (0.114)   (0.115)  
## x1                      0.756***  0.768***  0.774***
##                        (0.120)   (0.120)   (0.121)  
## x2                      1.420***  1.410***  1.408***
##                        (0.112)   (0.113)   (0.113)  
## poly(x3, degree = 2)1  13.461***                    
##                        (1.622)                      
## poly(x3, degree = 2)2   4.201*                      
##                        (1.620)                      
## poly(x3, degree = 3)1            13.486***          
##                                  (1.622)            
## poly(x3, degree = 3)2             4.177*            
##                                  (1.620)            
## poly(x3, degree = 3)3            -1.714             
##                                  (1.624)            
## poly(x3, degree = 4)1                      13.496***
##                                            (1.624)  
## poly(x3, degree = 4)2                       4.168*  
##                                            (1.622)  
## poly(x3, degree = 4)3                      -1.724   
##                                            (1.627)  
## poly(x3, degree = 4)4                      -1.075   
##                                            (1.616)  
## ----------------------------------------------------
## R-squared                 0.578     0.580     0.581 
## adj. R-squared            0.569     0.570     0.568 
## sigma                     1.610     1.610     1.612 
## F                        66.779    53.677    44.676 
## p                         0.000     0.000     0.000 
## Log-likelihood         -376.540  -375.968  -375.739 
## Deviance                505.649   502.762   501.613 
## AIC                     765.081   765.935   767.478 
## BIC                     784.871   789.024   793.864 
## N                       200       200       200     
## ====================================================
## Compare to raw.
m3d <- lm(y ~  x1 + x2 +  poly(x3, degree = 2, raw = TRUE), data=dat)
m5d <- lm(y ~  x1 + x2 +  poly(x3, degree = 3, raw = TRUE), data=dat)
m6d <- lm(y ~  x1 + x2 +  poly(x3, degree = 4, raw = TRUE), data=dat)
mtable(m3d, m5d, m6d)
## 
## Calls:
## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), 
##     data = dat)
## m5d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3, raw = TRUE), 
##     data = dat)
## m6d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4, raw = TRUE), 
##     data = dat)
## 
## ================================================================
##                                       m3d       m5d       m6d   
## ----------------------------------------------------------------
## (Intercept)                         0.112     0.199     0.326   
##                                    (0.395)   (0.403)   (0.446)  
## x1                                  0.756***  0.768***  0.774***
##                                    (0.120)   (0.120)   (0.121)  
## x2                                  1.420***  1.410***  1.408***
##                                    (0.112)   (0.113)   (0.113)  
## poly(x3, degree = 2, raw = TRUE)1   0.068                       
##                                    (0.797)                      
## poly(x3, degree = 2, raw = TRUE)2   0.992*                      
##                                    (0.383)                      
## poly(x3, degree = 3, raw = TRUE)1            -1.002             
##                                              (1.289)            
## poly(x3, degree = 3, raw = TRUE)2             2.618             
##                                              (1.587)            
## poly(x3, degree = 3, raw = TRUE)3            -0.601             
##                                              (0.570)            
## poly(x3, degree = 4, raw = TRUE)1                      -0.687   
##                                                        (1.375)  
## poly(x3, degree = 4, raw = TRUE)2                       0.751   
##                                                        (3.226)  
## poly(x3, degree = 4, raw = TRUE)3                       1.349   
##                                                        (2.987)  
## poly(x3, degree = 4, raw = TRUE)4                      -0.570   
##                                                        (0.857)  
## ----------------------------------------------------------------
## R-squared                             0.578     0.580     0.581 
## adj. R-squared                        0.569     0.570     0.568 
## sigma                                 1.610     1.610     1.612 
## F                                    66.779    53.677    44.676 
## p                                     0.000     0.000     0.000 
## Log-likelihood                     -376.540  -375.968  -375.739 
## Deviance                            505.649   502.762   501.613 
## AIC                                 765.081   765.935   767.478 
## BIC                                 784.871   789.024   793.864 
## N                                   200       200       200     
## ================================================================
##GG 2. A second advantage is that the t statistics are invariant under
##GG shift if you use orthogonal polynomials.
##GG compare these two:
m3a <- lm(y ~  x1 + x2 +  poly(x3, degree = 2), data=dat)
m3b <- lm(y ~  x1 + x2 +  poly(x3+1, degree = 2), data=dat)
summary(m3a)
## 
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.341      0.114   11.73  < 2e-16 ***
## x1                       0.756      0.120    6.30  1.9e-09 ***
## x2                       1.420      0.112   12.64  < 2e-16 ***
## poly(x3, degree = 2)1   13.461      1.622    8.30  1.7e-14 ***
## poly(x3, degree = 2)2    4.201      1.620    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
summary(m3b)
## 
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.704 -1.078  0.022  1.057  4.672 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.341      0.114   11.73  < 2e-16 ***
## x1                           0.756      0.120    6.30  1.9e-09 ***
## x2                           1.420      0.112   12.64  < 2e-16 ***
## poly(x3 + 1, degree = 2)1   13.461      1.622    8.30  1.7e-14 ***
## poly(x3 + 1, degree = 2)2    4.201      1.620    2.59     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569 
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16
##PJ Great argument. It underscores the fact that mean-centering
## is an illusion. Observe:

m3c <- lm(y ~  x1 + x2 +  poly((x3 - mean(x3)), degree = 2), data=dat)

mtable(m3a, m3b, m3c)
## 
## Calls:
## m3a: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
## m3b: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat)
## m3c: lm(formula = y ~ x1 + x2 + poly((x3 - mean(x3)), degree = 2), 
##     data = dat)
## 
## =================================================================
##                                        m3a       m3b       m3c   
## -----------------------------------------------------------------
## (Intercept)                          1.341***  1.341***  1.341***
##                                     (0.114)   (0.114)   (0.114)  
## x1                                   0.756***  0.756***  0.756***
##                                     (0.120)   (0.120)   (0.120)  
## x2                                   1.420***  1.420***  1.420***
##                                     (0.112)   (0.112)   (0.112)  
## poly(x3, degree = 2)1               13.461***                    
##                                     (1.622)                      
## poly(x3, degree = 2)2                4.201*                      
##                                     (1.620)                      
## poly(x3 + 1, degree = 2)1                     13.461***          
##                                               (1.622)            
## poly(x3 + 1, degree = 2)2                      4.201*            
##                                               (1.620)            
## poly((x3 - mean(x3)), degree = 2)1                      13.461***
##                                                         (1.622)  
## poly((x3 - mean(x3)), degree = 2)2                       4.201*  
##                                                         (1.620)  
## -----------------------------------------------------------------
## R-squared                              0.578     0.578     0.578 
## adj. R-squared                         0.569     0.569     0.569 
## sigma                                  1.610     1.610     1.610 
## F                                     66.779    66.779    66.779 
## p                                      0.000     0.000     0.000 
## Log-likelihood                      -376.540  -376.540  -376.540 
## Deviance                             505.649   505.649   505.649 
## AIC                                  765.081   765.081   765.081 
## BIC                                  784.871   784.871   784.871 
## N                                    200       200       200     
## =================================================================
## Mean-centering has no effect, poly() makes that plain.

## That is not true if you don;t use orthogonal polynomials,
## Compare these two:
m3d <- lm(y ~  x1 + x2 +  poly(x3, degree = 2, raw = TRUE), data=dat)
m3e <- lm(y ~  x1 + x2 +  poly(x3+1, degree = 2, raw = TRUE), data=dat)

mtable(m3d, m3e)
## 
## Calls:
## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), 
##     data = dat)
## m3e: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2, raw = TRUE), 
##     data = dat)
## 
## ==========================================================
##                                           m3d       m3e   
## ----------------------------------------------------------
## (Intercept)                             0.112     1.036   
##                                        (0.395)   (1.517)  
## x1                                      0.756***  0.756***
##                                        (0.120)   (0.120)  
## x2                                      1.420***  1.420***
##                                        (0.112)   (0.112)  
## poly(x3, degree = 2, raw = TRUE)1       0.068             
##                                        (0.797)            
## poly(x3, degree = 2, raw = TRUE)2       0.992*            
##                                        (0.383)            
## poly(x3 + 1, degree = 2, raw = TRUE)1            -1.917   
##                                                  (1.544)  
## poly(x3 + 1, degree = 2, raw = TRUE)2             0.992*  
##                                                  (0.383)  
## ----------------------------------------------------------
## R-squared                                 0.578     0.578 
## adj. R-squared                            0.569     0.569 
## sigma                                     1.610     1.610 
## F                                        66.779    66.779 
## p                                         0.000     0.000 
## Log-likelihood                         -376.540  -376.540 
## Deviance                                505.649   505.649 
## AIC                                     765.081   765.081 
## BIC                                     784.871   784.871 
## N                                       200       200     
## ==========================================================
## Now, the answer we've all been waiting for.
## What is an orthogonal polynomial and why does it have this special
## benefit?

## I finally understood on 2013-04-01.

## Consider the quadratic equation
## y = b0 + b1 x + b2 x^2

## The slope at point x is
##
## dy
## -- = b1 + b2 x
## dx

## And, if you re-arrange that, you see that the coefficient
## b1 is the slope plus something else:

## b1 = b2 x -  dy
##             ---
##              dx

## So, when you fit the quadritic regression and estimate
## b1, you don't just get "the slope" you want, you also get
## b2 x in there, and, as a result, your estimate of b1 changes
## when you re-position your point on the x axis.

## On the other hand, the second derivative is a constant:

## d^2y
## ----   =  b2
## dx^x

## We get the same estimate of b2 no matter if we center x or
## shift it, it is the same.

## How does the poly() function fix that?  Instead of giving
## the regression 2 columns, x and x-squared, the poly function
## effectively "takes x out of x-squared" in the second column.
## When poly(x, 2) creates 2 columns, the first one is always
## proportional to x--it is just the linear part. But the second
## column has x removed, and so the 2 columns from poly() are
## completely separate. They are "orthogonal". So You could run a
## regression with the first poly() column, and then add the second
## poly() column, and the estimate from the first column would not
## change.

x3poly <- poly(dat$x3, 2)

m4a <- lm(y ~ x3poly[ ,1], data = dat)
summary(m4a)
## 
## Call:
## lm(formula = y ~ x3poly[, 1], data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.149 -1.645  0.046  1.709  5.807 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.467      0.161    9.09  < 2e-16 ***
## x3poly[, 1]   12.968      2.281    5.69  4.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.28 on 198 degrees of freedom
## Multiple R-squared:  0.14,   Adjusted R-squared:  0.136 
## F-statistic: 32.3 on 1 and 198 DF,  p-value: 4.63e-08
m4b <- lm(y ~ x3poly[ ,1] + x3poly[ ,2], data = dat)
summary(m4b)
## 
## Call:
## lm(formula = y ~ x3poly[, 1] + x3poly[, 2], data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.531 -1.776  0.102  1.713  4.955 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.47       0.16    9.16  < 2e-16 ***
## x3poly[, 1]    12.97       2.27    5.73  3.8e-08 ***
## x3poly[, 2]     4.40       2.27    1.94    0.054 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.27 on 197 degrees of freedom
## Multiple R-squared:  0.156,  Adjusted R-squared:  0.148 
## F-statistic: 18.3 on 2 and 197 DF,  p-value: 5.25e-08
## Note, the coefficient on the first term DOES not change.
## Are you starting to see?  If you are uncertain whether to
## add a quadratic term in a regression, this is a good way
## because the estimate of the linear part does not change
## while you are testing the additional terms.

## When there are more predictors in the equation, the
## previous is not EXACTLY true, but it is ALMOST true. It is
## not exactly true because there may be a slight correlation
## between x1 and x2 and the poly terms.



## Residual Centering: How to make your own orthogonal polynomial

## If you did not have the poly() function, how could you
## achieve the same benefit?  My colleagues call this
## "residual centering" of a squared term.

## Regress x-squared on x

m5 <- lm(x3square ~ x3, data = dat)

## Take the residuals from that model. Call them x3squarerc.

dat$x3squarerc <- resid(m5)

## x3squarerc is the part of x3squared that has x3 removed.

## Note that the correlation of x3squarerc and x3 is 0, with minor
## rounding error
cor(dat$x3squarerc, dat$x3)
## [1] -7.592e-18
## But if you plot x3squarerc on x3, you see it is still
## quadratic.
plot(x3squarerc ~ x3, data = dat)

plot of chunk unnamed-chunk-1

## x3squarerc is just the quadratic part of x3square, no x left.

## Now run the regression with x3 and add x3squarerc

m6 <- lm(y ~  x1 + x2 + x3, data=dat)
m7 <- lm(y ~  x1 + x2 + x3 + x3squarerc, data=dat)
mtable(m6, m7)
## 
## Calls:
## m6: lm(formula = y ~ x1 + x2 + x3, data = dat)
## m7: lm(formula = y ~ x1 + x2 + x3 + x3squarerc, data = dat)
## 
## ===================================
##                    m6        m7    
## -----------------------------------
## (Intercept)     -0.646*   -0.636*  
##                 (0.269)   (0.265)  
## x1               0.787***  0.756***
##                 (0.121)   (0.120)  
## x2               1.408***  1.420***
##                 (0.114)   (0.112)  
## x3               2.035***  2.026***
##                 (0.248)   (0.244)  
## x3squarerc                 0.992*  
##                           (0.383)  
## -----------------------------------
## R-squared          0.563     0.578 
## adj. R-squared     0.557     0.569 
## sigma              1.634     1.610 
## F                 84.336    66.779 
## p                  0.000     0.000 
## Log-likelihood  -379.929  -376.540 
## Deviance         523.079   505.649 
## AIC              769.858   765.081 
## BIC              786.350   784.871 
## N                200       200     
## ===================================
## The linear part is just about the same. If you fit this
## again without x1 and x2, you'll see they are exactly the same.

## The do-it-yourself residual-centering leaves x3 unchanged, only
## fiddles with x3square.  Thus, in some sense, it is easier to
## understand than poly(). poly()
## re-scales x3 so it is roughly on the same range as the quadratic part.
## This is beneficial in numerical linear algebra (computer math).
## Observe for yourself, the first poly term is just
## x3, shrunken proportionally.

plot(x3poly[, 1] ~ x3, data = dat)

plot of chunk unnamed-chunk-1

## You notice that x3poly[ ,1] is mean centered:

mean(x3poly[  ,1])
## [1] 2.459e-18
summary(lm(x3poly[ ,1] ~ x3, data = dat))
## Warning: essentially perfect fit: summary may be unreliable
## 
## Call:
## lm(formula = x3poly[, 1] ~ x3, data = dat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.89e-17 -6.90e-18 -2.80e-18  9.00e-19  3.51e-16 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) -1.47e-01   4.45e-18 -3.30e+16   <2e-16 ***
## x3           1.51e-01   4.11e-18  3.66e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.73e-17 on 198 degrees of freedom
## Multiple R-squared:     1,   Adjusted R-squared:     1 
## F-statistic: 1.34e+33 on 1 and 198 DF,  p-value: <2e-16
## Warning message is inconsequential


## Lets create another variable, just for comparison.
newX <- data.frame(z = rnorm(1000, m = 50, sd = 10))
newX$zsquare <- newX$z^2

## Here are the summary stats.
summarize(newX)
## $numerics
##             z  zsquare
## 0%     18.990    360.6
## 25%    42.950   1844.7
## 50%    49.581   2458.3
## 75%    55.882   3122.8
## 100%   78.451   6154.6
## mean   49.526   2546.4
## sd      9.683    966.3
## var    93.756 933805.4
## NA's    0.000      0.0
## N    1000.000   1000.0
## 
## $factors
## NULL
xpol <- poly(newX$z, 2)
head(xpol, 20)
##               1         2
##  [1,] -0.036815  0.008153
##  [2,] -0.003082 -0.023479
##  [3,]  0.004198 -0.023235
##  [4,]  0.003102 -0.023433
##  [5,]  0.013109 -0.019519
##  [6,] -0.003836 -0.023361
##  [7,]  0.032236  0.001157
##  [8,]  0.017242 -0.016519
##  [9,]  0.006665 -0.022583
## [10,] -0.034777  0.004713
## [11,] -0.036372  0.007389
## [12,] -0.034082  0.003584
## [13,]  0.039273  0.013124
## [14,]  0.011481 -0.020479
## [15,]  0.037785  0.010398
## [16,]  0.010668 -0.020912
## [17,] -0.019891 -0.014454
## [18,]  0.058659  0.058223
## [19,] -0.030596 -0.001731
## [20,]  0.005715 -0.022868
summary(xpol)
##        1                  2           
##  Min.   :-0.09978   Min.   :-0.02368  
##  1st Qu.:-0.02149   1st Qu.:-0.02155  
##  Median : 0.00018   Median :-0.01328  
##  Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.02077   3rd Qu.: 0.00917  
##  Max.   : 0.09452   Max.   : 0.21137
summarize(as.data.frame(xpol))
## $numerics
##             1        2
## 0%     -0.100   -0.024
## 25%    -0.021   -0.022
## 50%     0.000   -0.013
## 75%     0.021    0.009
## 100%    0.095    0.211
## mean    0.000    0.000
## sd      0.032    0.032
## var     0.001    0.001
## NA's    0.000    0.000
## N    1000.000 1000.000
## 
## $factors
## NULL
## What do I see there?  The means of both columns are
## almost exactly 0.
##
## And the variances are identical (hence standard deviations are
## identical too).