## Paul Johnson
## 2013-04-11
##
## Explore quadratic regression. Explain ways to fit that in R.
## Demonstrates use of predict, newdata, and R's poly() function.
##
## From now on, anybody who wants help with quadratic regressions,
## well, has to run this through R. And read a lot of pithy comments.

## Manufacture some data.

set.seed(444532)
x3sd <- 0.5
dat <- data.frame(x1 = rnorm(200),
x2 = rnorm(200, s = 1),
x3 = rnorm(200, m = 1, s = x3sd))
b0 <- 0.4
b1 <- 0.7
b2 <- 1.5
b3 <- -0.9
b4 <- 1.4
d <- 2
stde <- 1.5
dat$y <- with(dat, b0 + b1 * x1 + b2 * x2 + b3 * x3 + b4 * x3^2 + rnorm(200, s = stde)) ## That has a very minor curvature in it. The key coefficient ## here is b4 = 1.4, meaning we have a U-shaped curve in x3. ## Plot just the quadratic part, to get the feeling. Note the plotmath. ## Only try that on a closed course with a trained driver :) quad <- function(x) b0 + b3 *x + b4 * x^2 curve(quad, from = -2, to = 2, xlab = "x3", ylab = substitute(y == A + B * x3 + C * x3^2, list(A = b0, B = b3, C = b4) ))  ## Suppose you estimate a linear model m0 <- lm(y ~ x1 + x2 + x3, data = dat) summary(m0)  ## ## Call: ## lm(formula = y ~ x1 + x2 + x3, data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -5.328 -1.182 -0.056 1.095 4.855 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.646 0.269 -2.40 0.017 * ## x1 0.787 0.121 6.51 6.3e-10 *** ## x2 1.408 0.114 12.37 < 2e-16 *** ## x3 2.035 0.248 8.21 2.8e-14 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.63 on 196 degrees of freedom ## Multiple R-squared: 0.563, Adjusted R-squared: 0.557 ## F-statistic: 84.3 on 3 and 196 DF, p-value: <2e-16  ## Please note: just because your "p value" is "really great", it does ## not mean you have "the right" model. ## There's a little bit of nonlinearity, as in the residual plot plot(m0, which = 1)  ## Its tough to see, though. termplot(m0, partial.resid = TRUE, terms = "x3")  ## The "truth" is quadratic. We'd like to estimate ## b4, and there are many ways that this can be done. ## ## I'm going to show 4 ways, one of which is more strongly ## recommended, but, as we see, they are all really ## interchangeable. ## Method 1. The old fashioned way. ## Create a new variable x3square and insert that. dat$x3square <- dat$x3 * dat$x3
m1 <- lm(y ~ x1 + x2 + x3 + x3square, data = dat)
summary(m1)

##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.704 -1.078  0.022  1.057  4.672
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1115     0.3945    0.28     0.78
## x1            0.7556     0.1199    6.30  1.9e-09 ***
## x2            1.4197     0.1123   12.64  < 2e-16 ***
## x3            0.0678     0.7970    0.09     0.93
## x3square      0.9924     0.3828    2.59     0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16

## I'll ask for variables one at a time so you see the problem.
## Interactively, I'd run this
## termplot(m1, partial.resid = TRUE)
## Now, I'm in a script, and I have to do:
termplot(m1, partial.resid = TRUE, terms = "x1")


termplot(m1, partial.resid = TRUE, terms = "x2")


termplot(m1, partial.resid = TRUE, terms = "x3")


termplot(m1, partial.resid = TRUE, terms = "x3square")


## You don't see much curve because the parts of
## x3 are separated into 2 columns, each of which
## is seen as a linear element in the regression.

## While we are here, look at the predictor "design matrix":

##   (Intercept)       x1      x2     x3 x3square
## 1           1 -0.96188  1.2770 0.5939   0.3527
## 2           1  1.30351  0.3905 0.6015   0.3618
## 3           1  1.65021  0.2225 0.9298   0.8645
## 4           1  0.72121 -1.0704 0.4411   0.1945
## 5           1  0.05278  0.4767 0.7713   0.5950
## 6           1  0.39589 -0.3344 0.7803   0.6088

## Method 2. Technically equivalent to Method 1. We can introduce
## the squared term and R will construct the equivalent of
## x3square for us, automatically.

## Method 2a. This is the most elementary way, it is the way new
## users always want to do it.

m2a <- lm(y ~ x1 + x2 + x3 + I(x3*x3), data = dat)
## same as
## m2a <- lm(y ~ x1 + x2 + x3 + I(x3^2), data = dat)
summary(m2a)

##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + I(x3 * x3), data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.704 -1.078  0.022  1.057  4.672
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1115     0.3945    0.28     0.78
## x1            0.7556     0.1199    6.30  1.9e-09 ***
## x2            1.4197     0.1123   12.64  < 2e-16 ***
## x3            0.0678     0.7970    0.09     0.93
## I(x3 * x3)    0.9924     0.3828    2.59     0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16

## Look at the predictor matrix:

##   (Intercept)       x1      x2     x3 I(x3 * x3)
## 1           1 -0.96188  1.2770 0.5939     0.3527
## 2           1  1.30351  0.3905 0.6015     0.3618
## 3           1  1.65021  0.2225 0.9298     0.8645
## 4           1  0.72121 -1.0704 0.4411     0.1945
## 5           1  0.05278  0.4767 0.7713     0.5950
## 6           1  0.39589 -0.3344 0.7803     0.6088

## Method 2b. Introduced only to
## show the poly() function.  The poly function can be "forced" to
## create same x3 and x3squared vectors by the raw = TRUE argument.
m2b <- lm(y ~  x1 + x2 + poly(x3, degree = d, raw = TRUE), data=dat)
summary(m2b)

##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE),
##     data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.704 -1.078  0.022  1.057  4.672
##
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         0.1115     0.3945    0.28     0.78
## x1                                  0.7556     0.1199    6.30  1.9e-09 ***
## x2                                  1.4197     0.1123   12.64  < 2e-16 ***
## poly(x3, degree = d, raw = TRUE)1   0.0678     0.7970    0.09     0.93
## poly(x3, degree = d, raw = TRUE)2   0.9924     0.3828    2.59     0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16

## See? Same!
cbind(coef(summary(m2a))[, 1:2], coef(summary(m2b))[, 1:2])

##             Estimate Std. Error Estimate Std. Error
## (Intercept)  0.11151     0.3945  0.11151     0.3945
## x1           0.75564     0.1199  0.75564     0.1199
## x2           1.41971     0.1123  1.41971     0.1123
## x3           0.06782     0.7970  0.06782     0.7970
## I(x3 * x3)   0.99240     0.3828  0.99240     0.3828

## Look at the predictor matrix:

##   (Intercept)       x1      x2 poly(x3, degree = d, raw = TRUE)1
## 1           1 -0.96188  1.2770                            0.5939
## 2           1  1.30351  0.3905                            0.6015
## 3           1  1.65021  0.2225                            0.9298
## 4           1  0.72121 -1.0704                            0.4411
## 5           1  0.05278  0.4767                            0.7713
## 6           1  0.39589 -0.3344                            0.7803
##   poly(x3, degree = d, raw = TRUE)2
## 1                            0.3527
## 2                            0.3618
## 3                            0.8645
## 4                            0.1945
## 5                            0.5950
## 6                            0.6088

## Method 3. The R recommended method poly().
## We are recommended to use "orthogonal polynomials" instead.
## This is more numerically stable.
m3 <- lm(y ~  x1 + x2 +  poly(x3, degree = d), data=dat)
summary(m3)

##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.704 -1.078  0.022  1.057  4.672
##
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)              1.341      0.114   11.73  < 2e-16 ***
## x1                       0.756      0.120    6.30  1.9e-09 ***
## x2                       1.420      0.112   12.64  < 2e-16 ***
## poly(x3, degree = d)1   13.461      1.622    8.30  1.7e-14 ***
## poly(x3, degree = d)2    4.201      1.620    2.59     0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.569
## F-statistic: 66.8 on 4 and 195 DF,  p-value: <2e-16

termplot(m3, partial.resid = TRUE, terms = "x1")


termplot(m3, partial.resid = TRUE, terms = "x2")


termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")

## Look at the predictor matrix, you see the poly() created
## 2 columns:

##   (Intercept)       x1      x2 poly(x3, degree = d)1 poly(x3, degree = d)2
## 1           1 -0.96188  1.2770              -0.05747              -0.01570
## 2           1  1.30351  0.3905              -0.05632              -0.01710
## 3           1  1.65021  0.2225              -0.00691              -0.05140
## 4           1  0.72121 -1.0704              -0.08047               0.01819
## 5           1  0.05278  0.4767              -0.03076              -0.04120
## 6           1  0.39589 -0.3344              -0.02941              -0.04209

## Hypo test rejects m0, incidentally:
anova(m3, m0, test = "F")

## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2 + poly(x3, degree = d)
## Model 2: y ~ x1 + x2 + x3
##   Res.Df RSS Df Sum of Sq    F Pr(>F)
## 1    195 506
## 2    196 523 -1     -17.4 6.72   0.01 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Coefficients look different, but that's an illusion.
## From a given combination of (x1, x2, x3), the predictions
## are exactly the same. See?
## Predicted values are identical.
cbind(m1$fitted, m2b$fitted, m3$fitted)  ## [,1] [,2] [,3] ## 1 1.587901 1.587901 1.587901 ## 2 2.050746 2.050746 2.050746 ## 3 2.595258 2.595258 2.595258 ## 4 -0.640246 -0.640246 -0.640246 ## 5 1.470894 1.470894 1.470894 ## 6 0.592977 0.592977 0.592977 ## 7 3.512096 3.512096 3.512096 ## 8 -1.888601 -1.888601 -1.888601 ## 9 2.970413 2.970413 2.970413 ## 10 1.500665 1.500665 1.500665 ## 11 0.842083 0.842083 0.842083 ## 12 3.687050 3.687050 3.687050 ## 13 2.621645 2.621645 2.621645 ## 14 1.563314 1.563314 1.563314 ## 15 -0.681850 -0.681850 -0.681850 ## 16 0.254745 0.254745 0.254745 ## 17 3.127178 3.127178 3.127178 ## 18 3.781417 3.781417 3.781417 ## 19 -0.260491 -0.260491 -0.260491 ## 20 2.034126 2.034126 2.034126 ## 21 0.002801 0.002801 0.002801 ## 22 3.327749 3.327749 3.327749 ## 23 0.179792 0.179792 0.179792 ## 24 2.656736 2.656736 2.656736 ## 25 2.146502 2.146502 2.146502 ## 26 1.915380 1.915380 1.915380 ## 27 3.366620 3.366620 3.366620 ## 28 4.799667 4.799667 4.799667 ## 29 -0.408718 -0.408718 -0.408718 ## 30 1.707546 1.707546 1.707546 ## 31 3.598631 3.598631 3.598631 ## 32 -0.221897 -0.221897 -0.221897 ## 33 -2.435707 -2.435707 -2.435707 ## 34 1.280965 1.280965 1.280965 ## 35 1.306584 1.306584 1.306584 ## 36 1.403266 1.403266 1.403266 ## 37 -0.566250 -0.566250 -0.566250 ## 38 2.595908 2.595908 2.595908 ## 39 1.807318 1.807318 1.807318 ## 40 1.370907 1.370907 1.370907 ## 41 0.579466 0.579466 0.579466 ## 42 1.261429 1.261429 1.261429 ## 43 -0.937704 -0.937704 -0.937704 ## 44 2.036284 2.036284 2.036284 ## 45 -0.925975 -0.925975 -0.925975 ## 46 4.121259 4.121259 4.121259 ## 47 4.325489 4.325489 4.325489 ## 48 3.304361 3.304361 3.304361 ## 49 1.822078 1.822078 1.822078 ## 50 4.225504 4.225504 4.225504 ## 51 2.066135 2.066135 2.066135 ## 52 0.510657 0.510657 0.510657 ## 53 3.573680 3.573680 3.573680 ## 54 4.026918 4.026918 4.026918 ## 55 0.852517 0.852517 0.852517 ## 56 4.198544 4.198544 4.198544 ## 57 0.439740 0.439740 0.439740 ## 58 2.465801 2.465801 2.465801 ## 59 3.580910 3.580910 3.580910 ## 60 -2.126628 -2.126628 -2.126628 ## 61 2.751106 2.751106 2.751106 ## 62 3.144364 3.144364 3.144364 ## 63 0.091496 0.091496 0.091496 ## 64 2.114355 2.114355 2.114355 ## 65 -1.800662 -1.800662 -1.800662 ## 66 -2.011919 -2.011919 -2.011919 ## 67 -0.928130 -0.928130 -0.928130 ## 68 0.914971 0.914971 0.914971 ## 69 1.492407 1.492407 1.492407 ## 70 2.871130 2.871130 2.871130 ## 71 -2.812713 -2.812713 -2.812713 ## 72 3.681676 3.681676 3.681676 ## 73 1.327964 1.327964 1.327964 ## 74 2.120344 2.120344 2.120344 ## 75 2.557755 2.557755 2.557755 ## 76 3.370278 3.370278 3.370278 ## 77 0.603910 0.603910 0.603910 ## 78 -0.836674 -0.836674 -0.836674 ## 79 1.059092 1.059092 1.059092 ## 80 0.705270 0.705270 0.705270 ## 81 -2.008946 -2.008946 -2.008946 ## 82 0.899437 0.899437 0.899437 ## 83 -0.650376 -0.650376 -0.650376 ## 84 -0.523109 -0.523109 -0.523109 ## 85 5.175379 5.175379 5.175379 ## 86 3.214712 3.214712 3.214712 ## 87 1.564456 1.564456 1.564456 ## 88 0.319828 0.319828 0.319828 ## 89 -0.373367 -0.373367 -0.373367 ## 90 3.629702 3.629702 3.629702 ## 91 -0.918426 -0.918426 -0.918426 ## 92 -0.136196 -0.136196 -0.136196 ## 93 -0.147370 -0.147370 -0.147370 ## 94 0.181605 0.181605 0.181605 ## 95 -0.016103 -0.016103 -0.016103 ## 96 -1.009314 -1.009314 -1.009314 ## 97 0.943233 0.943233 0.943233 ## 98 1.131804 1.131804 1.131804 ## 99 0.315319 0.315319 0.315319 ## 100 0.478852 0.478852 0.478852 ## 101 -0.457249 -0.457249 -0.457249 ## 102 3.254159 3.254159 3.254159 ## 103 0.509734 0.509734 0.509734 ## 104 -0.538809 -0.538809 -0.538809 ## 105 -0.802339 -0.802339 -0.802339 ## 106 3.948450 3.948450 3.948450 ## 107 5.719634 5.719634 5.719634 ## 108 0.957162 0.957162 0.957162 ## 109 2.410540 2.410540 2.410540 ## 110 -0.593465 -0.593465 -0.593465 ## 111 2.376358 2.376358 2.376358 ## 112 4.492950 4.492950 4.492950 ## 113 -0.099627 -0.099627 -0.099627 ## 114 2.327146 2.327146 2.327146 ## 115 -2.561914 -2.561914 -2.561914 ## 116 2.810497 2.810497 2.810497 ## 117 2.215388 2.215388 2.215388 ## 118 0.817026 0.817026 0.817026 ## 119 2.261901 2.261901 2.261901 ## 120 1.469913 1.469913 1.469913 ## 121 -0.711707 -0.711707 -0.711707 ## 122 -1.676843 -1.676843 -1.676843 ## 123 1.097083 1.097083 1.097083 ## 124 0.018402 0.018402 0.018402 ## 125 0.031408 0.031408 0.031408 ## 126 0.549038 0.549038 0.549038 ## 127 3.144330 3.144330 3.144330 ## 128 0.508807 0.508807 0.508807 ## 129 -0.196184 -0.196184 -0.196184 ## 130 1.295479 1.295479 1.295479 ## 131 3.588403 3.588403 3.588403 ## 132 0.998048 0.998048 0.998048 ## 133 1.973393 1.973393 1.973393 ## 134 -2.138805 -2.138805 -2.138805 ## 135 2.420122 2.420122 2.420122 ## 136 1.141248 1.141248 1.141248 ## 137 3.007974 3.007974 3.007974 ## 138 5.184813 5.184813 5.184813 ## 139 0.277537 0.277537 0.277537 ## 140 2.669536 2.669536 2.669536 ## 141 1.985234 1.985234 1.985234 ## 142 6.342566 6.342566 6.342566 ## 143 2.661660 2.661660 2.661660 ## 144 2.456213 2.456213 2.456213 ## 145 -1.748226 -1.748226 -1.748226 ## 146 -0.585198 -0.585198 -0.585198 ## 147 4.343878 4.343878 4.343878 ## 148 1.893083 1.893083 1.893083 ## 149 3.044453 3.044453 3.044453 ## 150 3.103273 3.103273 3.103273 ## 151 -1.605090 -1.605090 -1.605090 ## 152 3.366576 3.366576 3.366576 ## 153 3.199743 3.199743 3.199743 ## 154 -0.547855 -0.547855 -0.547855 ## 155 3.997156 3.997156 3.997156 ## 156 1.218089 1.218089 1.218089 ## 157 2.887254 2.887254 2.887254 ## 158 0.570990 0.570990 0.570990 ## 159 -1.339621 -1.339621 -1.339621 ## 160 1.302049 1.302049 1.302049 ## 161 4.807972 4.807972 4.807972 ## 162 2.777687 2.777687 2.777687 ## 163 1.404771 1.404771 1.404771 ## 164 -2.209261 -2.209261 -2.209261 ## 165 6.036649 6.036649 6.036649 ## 166 -0.662940 -0.662940 -0.662940 ## 167 -0.117769 -0.117769 -0.117769 ## 168 -0.223198 -0.223198 -0.223198 ## 169 4.469197 4.469197 4.469197 ## 170 0.956474 0.956474 0.956474 ## 171 2.916498 2.916498 2.916498 ## 172 -0.691866 -0.691866 -0.691866 ## 173 2.106454 2.106454 2.106454 ## 174 1.377192 1.377192 1.377192 ## 175 1.000602 1.000602 1.000602 ## 176 1.304451 1.304451 1.304451 ## 177 0.708038 0.708038 0.708038 ## 178 1.700724 1.700724 1.700724 ## 179 1.205922 1.205922 1.205922 ## 180 2.172935 2.172935 2.172935 ## 181 1.420999 1.420999 1.420999 ## 182 4.103009 4.103009 4.103009 ## 183 4.749370 4.749370 4.749370 ## 184 3.400597 3.400597 3.400597 ## 185 2.805549 2.805549 2.805549 ## 186 3.008310 3.008310 3.008310 ## 187 2.077985 2.077985 2.077985 ## 188 -0.453694 -0.453694 -0.453694 ## 189 0.603055 0.603055 0.603055 ## 190 2.256232 2.256232 2.256232 ## 191 2.796632 2.796632 2.796632 ## 192 0.094159 0.094159 0.094159 ## 193 -0.033334 -0.033334 -0.033334 ## 194 0.709930 0.709930 0.709930 ## 195 3.304196 3.304196 3.304196 ## 196 1.735480 1.735480 1.735480 ## 197 3.891042 3.891042 3.891042 ## 198 2.348177 2.348177 2.348177 ## 199 -1.251348 -1.251348 -1.251348 ## 200 2.021106 2.021106 2.021106  ## That's not all. If you check the 95% CI's on the predicted values, ## or R^2, or anything logically comparable, they'll be same. ## Some people say it is very important to "mean center" ## the quadratic term. Lets do that, show it makes no ## difference whatsoever. x3c is mean centered: dat$x3c <- scale(dat$x3, scale = FALSE) ## mean centered m4 <- lm(y ~ x1 + x2 + x3c + I(x3c*x3c), data=dat) summary(m4)  ## ## Call: ## lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -5.704 -1.078 0.022 1.057 4.672 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.122 0.142 7.92 1.8e-13 *** ## x1 0.756 0.120 6.30 1.9e-09 *** ## x2 1.420 0.112 12.64 < 2e-16 *** ## x3c 2.004 0.244 8.20 3.2e-14 *** ## I(x3c * x3c) 0.992 0.383 2.59 0.01 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.61 on 195 degrees of freedom ## Multiple R-squared: 0.578, Adjusted R-squared: 0.569 ## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16  ## Superficially better... ## Coefficients look different, but that's an illusion ## Predicted values are identical head(cbind(m1$fitted, m2b$fitted, m3$fitted, m4$fitted))  ## [,1] [,2] [,3] [,4] ## 1 1.5879 1.5879 1.5879 1.5879 ## 2 2.0507 2.0507 2.0507 2.0507 ## 3 2.5953 2.5953 2.5953 2.5953 ## 4 -0.6402 -0.6402 -0.6402 -0.6402 ## 5 1.4709 1.4709 1.4709 1.4709 ## 6 0.5930 0.5930 0.5930 0.5930  ## Can you explain? ## It is a known fact that the estimate of the squared term ## is the same with centered data, but the estimate of the ## linear term changes. This helps to overview that library(memisc)  ## Loading required package: lattice ## Loading required package: methods ## Loading required package: MASS ## ## Attaching package: 'memisc' ## ## The following objects are masked from 'package:stats': ## ## contrasts, contr.sum, contr.treatment ## ## The following object is masked from 'package:base': ## ## as.array  mtable(m1, m2b, m3, m4)  ## ## Calls: ## m1: lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat) ## m2b: lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE), ## data = dat) ## m3: lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat) ## m4: lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat) ## ## ========================================================================== ## m1 m2b m3 m4 ## -------------------------------------------------------------------------- ## (Intercept) 0.112 0.112 1.341*** 1.122*** ## (0.395) (0.395) (0.114) (0.142) ## x1 0.756*** 0.756*** 0.756*** 0.756*** ## (0.120) (0.120) (0.120) (0.120) ## x2 1.420*** 1.420*** 1.420*** 1.420*** ## (0.112) (0.112) (0.112) (0.112) ## x3 0.068 ## (0.797) ## x3square 0.992* ## (0.383) ## poly(x3, degree = d, raw = TRUE)1 0.068 ## (0.797) ## poly(x3, degree = d, raw = TRUE)2 0.992* ## (0.383) ## poly(x3, degree = d)1 13.461*** ## (1.622) ## poly(x3, degree = d)2 4.201* ## (1.620) ## x3c 2.004*** ## (0.244) ## I(x3c * x3c) 0.992* ## (0.383) ## -------------------------------------------------------------------------- ## R-squared 0.578 0.578 0.578 0.578 ## adj. R-squared 0.569 0.569 0.569 0.569 ## sigma 1.610 1.610 1.610 1.610 ## F 66.779 66.779 66.779 66.779 ## p 0.000 0.000 0.000 0.000 ## Log-likelihood -376.540 -376.540 -376.540 -376.540 ## Deviance 505.649 505.649 505.649 505.649 ## AIC 765.081 765.081 765.081 765.081 ## BIC 784.871 784.871 784.871 784.871 ## N 200 200 200 200 ## ==========================================================================  ## Once you understand why that change happens, you will begin to see ## why poly() is so helpful. ## And, in case you wonder, this is one of the cases where x3 and x3^2 ## are strongly correlated with one another. cor(model.matrix(m1)[ ,-1])  ## x1 x2 x3 x3square ## x1 1.00000 -0.03823 -0.11675 -0.07957 ## x2 -0.03823 1.00000 0.03466 0.02005 ## x3 -0.11675 0.03466 1.00000 0.95163 ## x3square -0.07957 0.02005 0.95163 1.00000  ## Note that termplot will cooperate if we type in the full ## name of the predictor from the model. Just using "x3" fails termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")  ## Whereas the thing that seems "obvious" to me fails termplot(m3, partial.resid = TRUE, terms = "x3")  ## Error: subscript out of bounds  ## You should see the same error I do: ## Error in predict.lm(model, type = "terms", se.fit = se, terms = terms) : ## That's one of the things I've tried to fix in rockchalk's plotCurves library(rockchalk)  ## Loading required package: car ## ## Attaching package: 'car' ## ## The following object is masked from 'package:memisc': ## ## recode ## ## Loading required package: lme4 ## Loading required package: Matrix ## Loading required package: Rcpp ## ## Attaching package: 'rockchalk' ## ## The following object is masked from 'package:MASS': ## ## mvrnorm  plotCurves(m3, plotx = "x3")  plotCurves(m3, plotx = "x3", interval = "conf")  ## It is important to get used to the R predict() family of functions. ## A regression tool is not complete unless it has one. ## And using the newdata argument in predict is really important! ## Look what goes wrong if you accidentally omit a predictor ## from a newdata object ndf1 <- data.frame(x2 = c(1), x3 = c(2)) predict(m1, newdata = ndf1)  ## Error: object 'x1' not found  ## fails ## Error in eval(expr, envir, enclos) : object 'x1' not found ## Now get it right ndf2 <- data.frame(x1 = c(1), x2 = c(1), x3 = c(2)) predict(m2a, newdata = ndf2)  ## 1 ## 6.392  ## succeeded ## Now, we'd like to calculate predicted values for, say, 20 values of x3, evenly ## spaced across its range. We'll leave x1 and x2 at their averages. ## That's not a difficult calcuation, but why not use the handy functions I've ## prepared for it. predictOMatic(m3, interval = "confidence")  ##$x1
##        x1      x2     x3     fit     lwr     upr
## 1 -3.2550 0.04564 0.9757 -1.2725 -2.0931 -0.4518
## 2 -0.4605 0.04564 0.9757  0.8392  0.5389  1.1394
## 3  0.1471 0.04564 0.9757  1.2983  1.0173  1.5793
## 4  0.6096 0.04564 0.9757  1.6478  1.3346  1.9611
## 5  2.7511 0.04564 0.9757  3.2660  2.5598  3.9722
##
## $x2 ## x1 x2 x3 fit lwr upr ## 1 0.07998 -2.68799 0.9757 -2.6334 -3.30626 -1.9605 ## 2 0.07998 -0.59325 0.9757 0.3405 0.02425 0.6568 ## 3 0.07998 0.04662 0.9757 1.2490 0.96937 1.5286 ## 4 0.07998 0.77808 0.9757 2.2874 1.96742 2.6075 ## 5 0.07998 2.52345 0.9757 4.7654 4.15512 5.3756 ## ##$x3
##        x1      x2      x3    fit     lwr    upr
## 1 0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671
## 2 0.07998 0.04564  0.6432 0.6910  0.4050 0.9769
## 3 0.07998 0.04564  0.9450 1.1870  0.9078 1.4661
## 4 0.07998 0.04564  1.3039 2.0125  1.7226 2.3024
## 5 0.07998 0.04564  2.0443 4.5229  3.6481 5.3977

m1pr1 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))), interval = "confidence") m1pr1  ## x1 x2 x3 fit lwr upr ## 1 0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671 ## 2 0.07998 0.04564 0.2631 0.3233 -0.1484 0.7950 ## 3 0.07998 0.04564 0.4411 0.4597 0.1126 0.8068 ## 4 0.07998 0.04564 0.5534 0.5781 0.2744 0.8819 ## 5 0.07998 0.04564 0.5984 0.6326 0.3394 0.9259 ## 6 0.07998 0.04564 0.6753 0.7351 0.4527 1.0174 ## 7 0.07998 0.04564 0.7458 0.8392 0.5610 1.1175 ## 8 0.07998 0.04564 0.8169 0.9543 0.6769 1.2318 ## 9 0.07998 0.04564 0.8695 1.0460 0.7680 1.3239 ## 10 0.07998 0.04564 0.9238 1.1463 0.8674 1.4251 ## 11 0.07998 0.04564 0.9549 1.2064 0.9271 1.4857 ## 12 0.07998 0.04564 1.0336 1.3670 1.0867 1.6473 ## 13 0.07998 0.04564 1.0991 1.5100 1.2291 1.7909 ## 14 0.07998 0.04564 1.1682 1.6702 1.3883 1.9522 ## 15 0.07998 0.04564 1.2732 1.9317 1.6448 2.2186 ## 16 0.07998 0.04564 1.3442 2.1211 1.8258 2.4164 ## 17 0.07998 0.04564 1.4565 2.4409 2.1183 2.7635 ## 18 0.07998 0.04564 1.6343 2.9981 2.5831 3.4131 ## 19 0.07998 0.04564 1.7877 3.5295 2.9809 4.0781 ## 20 0.07998 0.04564 2.0443 4.5229 3.6481 5.3977  ## Wonder why that works? Here's a middle step that gives you the hint newdata(m3, fl = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))))

##        x1      x2     x3
## 1 0.07998 0.04564 0.9757

plot(fit ~ x3, data = m1pr1, type = "l")
lines(upr ~ x3, data = m1pr1, col = "purple", lty = 4)
lines(lwr ~ x3, data = m1pr1, col = "purple", lty = 4)
legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")


## Notice that the curves look bad, a little jagged. Can you figure
## out why?  For a hint, run this:

## I suggest users go to the beginning and fiddle the parameters. Make
## the x3sd bigger, or change b3 and b4. See what happens.

m1pr2 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 1000))), interval = "confidence") plot(fit ~ x3, data = m1pr2, type = "l") lines(upr ~ x3, data = m1pr2, col = "purple", lty = 4) lines(lwr ~ x3, data = m1pr2, col = "purple", lty = 4) legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")  ## Ooh la la, as we say in France :) ## Major points. ## 1. If the true relationship is quadratic, we may not find it unless ## we know to look for it. I have fiddled with the parameters for this ## model quite a bit. In many cases, the anova() test is unequivocal, ## rejecting m0. But quite often, it is not powerful. ## Still, a theory that hints us to look for curvature in the effect of x3 ## would be most welcome. ## ## 2. In any nonlinear model, an emphasis in the interpretation is ## analysis of predicted values. These are "marginal effects." If x ## changes, how do we predict y changes? That requires us to use ## predict() methods in R. ## ## termplot does pretty well with these, the rockchalk::plotCurves ## function handles interactions and some nonlinearities that termplot ## does not manage. ## 3. Does it matter how we estimate these models? ## Some experts are emphatic about the need to mean-center x to ## ameliorate multicollinearity due to the similarity of x and x^2, ## but I believe that is completely wrong. Centering results in exactly ## the same predicted values, it is, literally, identical. ## Other R experts I trust strongly urge poly(x,2), not x + I(x*x) or such. ## I think there are several different reasons that people have in mind ## to justify that. I think some of the general prevention of digital ## computation round-off error have been addressed by improvements in the ## algorithms that are used to fit regressions. That's why we don't ## see a difference in the predicted values of the 4 methods. ## So students and I want to know "why do the smart people like ## poly() so much. I have finally grasped the answer, thanks to ## advice in the r-help email list in 2013. ## Gabor Grothendieck listed advantages of poly. I'm pasting in ## his explanations and putting in example code from examples above. ##GG 1. One benefit is if you fit a higher degree polynomial using ##GG poly(x, n) the lower degree coefficients will agree with the fit using ##GG a lower n. ##PJ I can verify that. It is strictly literally true if there ## are no other predictors than the poly terms. Example: m3 <- lm(y ~ poly(x3, degree = 2), data=dat) m5 <- lm(y ~ poly(x3, degree = 3), data=dat) m6 <- lm(y ~ poly(x3, degree = 4), data=dat) mtable(m3, m5, m6)  ## ## Calls: ## m3: lm(formula = y ~ poly(x3, degree = 2), data = dat) ## m5: lm(formula = y ~ poly(x3, degree = 3), data = dat) ## m6: lm(formula = y ~ poly(x3, degree = 4), data = dat) ## ## ==================================================== ## m3 m5 m6 ## ---------------------------------------------------- ## (Intercept) 1.467*** 1.467*** 1.467*** ## (0.160) (0.160) (0.160) ## poly(x3, degree = 2)1 12.968*** ## (2.265) ## poly(x3, degree = 2)2 4.397 ## (2.265) ## poly(x3, degree = 3)1 12.968*** ## (2.264) ## poly(x3, degree = 3)2 4.397 ## (2.264) ## poly(x3, degree = 3)3 -2.433 ## (2.264) ## poly(x3, degree = 4)1 12.968*** ## (2.269) ## poly(x3, degree = 4)2 4.397 ## (2.269) ## poly(x3, degree = 4)3 -2.433 ## (2.269) ## poly(x3, degree = 4)4 -0.855 ## (2.269) ## ---------------------------------------------------- ## R-squared 0.156 0.161 0.162 ## adj. R-squared 0.148 0.149 0.145 ## sigma 2.265 2.264 2.269 ## F 18.272 12.576 9.426 ## p 0.000 0.000 0.000 ## Log-likelihood -445.805 -445.218 -445.145 ## Deviance 1010.794 1004.876 1004.145 ## AIC 899.610 900.436 902.290 ## BIC 912.803 916.927 922.080 ## N 200 200 200 ## ====================================================  ##PJ However, if the other predictors, they are not orthogonal to the ##PJ poly() terms, so the argument breaks down a little bit: m3c <- lm(y ~ x1 + x2 + poly(x3, degree = 2), data=dat) m5c <- lm(y ~ x1 + x2 + poly(x3, degree = 3), data=dat) m6c <- lm(y ~ x1 + x2 + poly(x3, degree = 4), data=dat) mtable(m3c, m5c, m6c)  ## ## Calls: ## m3c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat) ## m5c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3), data = dat) ## m6c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4), data = dat) ## ## ==================================================== ## m3c m5c m6c ## ---------------------------------------------------- ## (Intercept) 1.341*** 1.341*** 1.340*** ## (0.114) (0.114) (0.115) ## x1 0.756*** 0.768*** 0.774*** ## (0.120) (0.120) (0.121) ## x2 1.420*** 1.410*** 1.408*** ## (0.112) (0.113) (0.113) ## poly(x3, degree = 2)1 13.461*** ## (1.622) ## poly(x3, degree = 2)2 4.201* ## (1.620) ## poly(x3, degree = 3)1 13.486*** ## (1.622) ## poly(x3, degree = 3)2 4.177* ## (1.620) ## poly(x3, degree = 3)3 -1.714 ## (1.624) ## poly(x3, degree = 4)1 13.496*** ## (1.624) ## poly(x3, degree = 4)2 4.168* ## (1.622) ## poly(x3, degree = 4)3 -1.724 ## (1.627) ## poly(x3, degree = 4)4 -1.075 ## (1.616) ## ---------------------------------------------------- ## R-squared 0.578 0.580 0.581 ## adj. R-squared 0.569 0.570 0.568 ## sigma 1.610 1.610 1.612 ## F 66.779 53.677 44.676 ## p 0.000 0.000 0.000 ## Log-likelihood -376.540 -375.968 -375.739 ## Deviance 505.649 502.762 501.613 ## AIC 765.081 765.935 767.478 ## BIC 784.871 789.024 793.864 ## N 200 200 200 ## ====================================================  ## Compare to raw. m3d <- lm(y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), data=dat) m5d <- lm(y ~ x1 + x2 + poly(x3, degree = 3, raw = TRUE), data=dat) m6d <- lm(y ~ x1 + x2 + poly(x3, degree = 4, raw = TRUE), data=dat) mtable(m3d, m5d, m6d)  ## ## Calls: ## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), ## data = dat) ## m5d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3, raw = TRUE), ## data = dat) ## m6d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4, raw = TRUE), ## data = dat) ## ## ================================================================ ## m3d m5d m6d ## ---------------------------------------------------------------- ## (Intercept) 0.112 0.199 0.326 ## (0.395) (0.403) (0.446) ## x1 0.756*** 0.768*** 0.774*** ## (0.120) (0.120) (0.121) ## x2 1.420*** 1.410*** 1.408*** ## (0.112) (0.113) (0.113) ## poly(x3, degree = 2, raw = TRUE)1 0.068 ## (0.797) ## poly(x3, degree = 2, raw = TRUE)2 0.992* ## (0.383) ## poly(x3, degree = 3, raw = TRUE)1 -1.002 ## (1.289) ## poly(x3, degree = 3, raw = TRUE)2 2.618 ## (1.587) ## poly(x3, degree = 3, raw = TRUE)3 -0.601 ## (0.570) ## poly(x3, degree = 4, raw = TRUE)1 -0.687 ## (1.375) ## poly(x3, degree = 4, raw = TRUE)2 0.751 ## (3.226) ## poly(x3, degree = 4, raw = TRUE)3 1.349 ## (2.987) ## poly(x3, degree = 4, raw = TRUE)4 -0.570 ## (0.857) ## ---------------------------------------------------------------- ## R-squared 0.578 0.580 0.581 ## adj. R-squared 0.569 0.570 0.568 ## sigma 1.610 1.610 1.612 ## F 66.779 53.677 44.676 ## p 0.000 0.000 0.000 ## Log-likelihood -376.540 -375.968 -375.739 ## Deviance 505.649 502.762 501.613 ## AIC 765.081 765.935 767.478 ## BIC 784.871 789.024 793.864 ## N 200 200 200 ## ================================================================  ##GG 2. A second advantage is that the t statistics are invariant under ##GG shift if you use orthogonal polynomials. ##GG compare these two: m3a <- lm(y ~ x1 + x2 + poly(x3, degree = 2), data=dat) m3b <- lm(y ~ x1 + x2 + poly(x3+1, degree = 2), data=dat) summary(m3a)  ## ## Call: ## lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -5.704 -1.078 0.022 1.057 4.672 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.341 0.114 11.73 < 2e-16 *** ## x1 0.756 0.120 6.30 1.9e-09 *** ## x2 1.420 0.112 12.64 < 2e-16 *** ## poly(x3, degree = 2)1 13.461 1.622 8.30 1.7e-14 *** ## poly(x3, degree = 2)2 4.201 1.620 2.59 0.01 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.61 on 195 degrees of freedom ## Multiple R-squared: 0.578, Adjusted R-squared: 0.569 ## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16  summary(m3b)  ## ## Call: ## lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -5.704 -1.078 0.022 1.057 4.672 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.341 0.114 11.73 < 2e-16 *** ## x1 0.756 0.120 6.30 1.9e-09 *** ## x2 1.420 0.112 12.64 < 2e-16 *** ## poly(x3 + 1, degree = 2)1 13.461 1.622 8.30 1.7e-14 *** ## poly(x3 + 1, degree = 2)2 4.201 1.620 2.59 0.01 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.61 on 195 degrees of freedom ## Multiple R-squared: 0.578, Adjusted R-squared: 0.569 ## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16  ##PJ Great argument. It underscores the fact that mean-centering ## is an illusion. Observe: m3c <- lm(y ~ x1 + x2 + poly((x3 - mean(x3)), degree = 2), data=dat) mtable(m3a, m3b, m3c)  ## ## Calls: ## m3a: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat) ## m3b: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat) ## m3c: lm(formula = y ~ x1 + x2 + poly((x3 - mean(x3)), degree = 2), ## data = dat) ## ## ================================================================= ## m3a m3b m3c ## ----------------------------------------------------------------- ## (Intercept) 1.341*** 1.341*** 1.341*** ## (0.114) (0.114) (0.114) ## x1 0.756*** 0.756*** 0.756*** ## (0.120) (0.120) (0.120) ## x2 1.420*** 1.420*** 1.420*** ## (0.112) (0.112) (0.112) ## poly(x3, degree = 2)1 13.461*** ## (1.622) ## poly(x3, degree = 2)2 4.201* ## (1.620) ## poly(x3 + 1, degree = 2)1 13.461*** ## (1.622) ## poly(x3 + 1, degree = 2)2 4.201* ## (1.620) ## poly((x3 - mean(x3)), degree = 2)1 13.461*** ## (1.622) ## poly((x3 - mean(x3)), degree = 2)2 4.201* ## (1.620) ## ----------------------------------------------------------------- ## R-squared 0.578 0.578 0.578 ## adj. R-squared 0.569 0.569 0.569 ## sigma 1.610 1.610 1.610 ## F 66.779 66.779 66.779 ## p 0.000 0.000 0.000 ## Log-likelihood -376.540 -376.540 -376.540 ## Deviance 505.649 505.649 505.649 ## AIC 765.081 765.081 765.081 ## BIC 784.871 784.871 784.871 ## N 200 200 200 ## =================================================================  ## Mean-centering has no effect, poly() makes that plain. ## That is not true if you don;t use orthogonal polynomials, ## Compare these two: m3d <- lm(y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), data=dat) m3e <- lm(y ~ x1 + x2 + poly(x3+1, degree = 2, raw = TRUE), data=dat) mtable(m3d, m3e)  ## ## Calls: ## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), ## data = dat) ## m3e: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2, raw = TRUE), ## data = dat) ## ## ========================================================== ## m3d m3e ## ---------------------------------------------------------- ## (Intercept) 0.112 1.036 ## (0.395) (1.517) ## x1 0.756*** 0.756*** ## (0.120) (0.120) ## x2 1.420*** 1.420*** ## (0.112) (0.112) ## poly(x3, degree = 2, raw = TRUE)1 0.068 ## (0.797) ## poly(x3, degree = 2, raw = TRUE)2 0.992* ## (0.383) ## poly(x3 + 1, degree = 2, raw = TRUE)1 -1.917 ## (1.544) ## poly(x3 + 1, degree = 2, raw = TRUE)2 0.992* ## (0.383) ## ---------------------------------------------------------- ## R-squared 0.578 0.578 ## adj. R-squared 0.569 0.569 ## sigma 1.610 1.610 ## F 66.779 66.779 ## p 0.000 0.000 ## Log-likelihood -376.540 -376.540 ## Deviance 505.649 505.649 ## AIC 765.081 765.081 ## BIC 784.871 784.871 ## N 200 200 ## ==========================================================  ## Now, the answer we've all been waiting for. ## What is an orthogonal polynomial and why does it have this special ## benefit? ## I finally understood on 2013-04-01. ## Consider the quadratic equation ## y = b0 + b1 x + b2 x^2 ## The slope at point x is ## ## dy ## -- = b1 + b2 x ## dx ## And, if you re-arrange that, you see that the coefficient ## b1 is the slope plus something else: ## b1 = b2 x - dy ## --- ## dx ## So, when you fit the quadritic regression and estimate ## b1, you don't just get "the slope" you want, you also get ## b2 x in there, and, as a result, your estimate of b1 changes ## when you re-position your point on the x axis. ## On the other hand, the second derivative is a constant: ## d^2y ## ---- = b2 ## dx^x ## We get the same estimate of b2 no matter if we center x or ## shift it, it is the same. ## How does the poly() function fix that? Instead of giving ## the regression 2 columns, x and x-squared, the poly function ## effectively "takes x out of x-squared" in the second column. ## When poly(x, 2) creates 2 columns, the first one is always ## proportional to x--it is just the linear part. But the second ## column has x removed, and so the 2 columns from poly() are ## completely separate. They are "orthogonal". So You could run a ## regression with the first poly() column, and then add the second ## poly() column, and the estimate from the first column would not ## change. x3poly <- poly(dat$x3, 2)

m4a <- lm(y ~ x3poly[ ,1], data = dat)
summary(m4a)

##
## Call:
## lm(formula = y ~ x3poly[, 1], data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.149 -1.645  0.046  1.709  5.807
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.467      0.161    9.09  < 2e-16 ***
## x3poly[, 1]   12.968      2.281    5.69  4.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.28 on 198 degrees of freedom
## Multiple R-squared:  0.14,   Adjusted R-squared:  0.136
## F-statistic: 32.3 on 1 and 198 DF,  p-value: 4.63e-08

m4b <- lm(y ~ x3poly[ ,1] + x3poly[ ,2], data = dat)
summary(m4b)

##
## Call:
## lm(formula = y ~ x3poly[, 1] + x3poly[, 2], data = dat)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -5.531 -1.776  0.102  1.713  4.955
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)     1.47       0.16    9.16  < 2e-16 ***
## x3poly[, 1]    12.97       2.27    5.73  3.8e-08 ***
## x3poly[, 2]     4.40       2.27    1.94    0.054 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.27 on 197 degrees of freedom
## Multiple R-squared:  0.156,  Adjusted R-squared:  0.148
## F-statistic: 18.3 on 2 and 197 DF,  p-value: 5.25e-08

## Note, the coefficient on the first term DOES not change.
## Are you starting to see?  If you are uncertain whether to
## add a quadratic term in a regression, this is a good way
## because the estimate of the linear part does not change
## while you are testing the additional terms.

## When there are more predictors in the equation, the
## previous is not EXACTLY true, but it is ALMOST true. It is
## not exactly true because there may be a slight correlation
## between x1 and x2 and the poly terms.

## Residual Centering: How to make your own orthogonal polynomial

## If you did not have the poly() function, how could you
## achieve the same benefit?  My colleagues call this
## "residual centering" of a squared term.

## Regress x-squared on x

m5 <- lm(x3square ~ x3, data = dat)

## Take the residuals from that model. Call them x3squarerc.

dat$x3squarerc <- resid(m5) ## x3squarerc is the part of x3squared that has x3 removed. ## Note that the correlation of x3squarerc and x3 is 0, with minor ## rounding error cor(dat$x3squarerc, dat$x3)  ## [1] -7.592e-18  ## But if you plot x3squarerc on x3, you see it is still ## quadratic. plot(x3squarerc ~ x3, data = dat)  ## x3squarerc is just the quadratic part of x3square, no x left. ## Now run the regression with x3 and add x3squarerc m6 <- lm(y ~ x1 + x2 + x3, data=dat) m7 <- lm(y ~ x1 + x2 + x3 + x3squarerc, data=dat) mtable(m6, m7)  ## ## Calls: ## m6: lm(formula = y ~ x1 + x2 + x3, data = dat) ## m7: lm(formula = y ~ x1 + x2 + x3 + x3squarerc, data = dat) ## ## =================================== ## m6 m7 ## ----------------------------------- ## (Intercept) -0.646* -0.636* ## (0.269) (0.265) ## x1 0.787*** 0.756*** ## (0.121) (0.120) ## x2 1.408*** 1.420*** ## (0.114) (0.112) ## x3 2.035*** 2.026*** ## (0.248) (0.244) ## x3squarerc 0.992* ## (0.383) ## ----------------------------------- ## R-squared 0.563 0.578 ## adj. R-squared 0.557 0.569 ## sigma 1.634 1.610 ## F 84.336 66.779 ## p 0.000 0.000 ## Log-likelihood -379.929 -376.540 ## Deviance 523.079 505.649 ## AIC 769.858 765.081 ## BIC 786.350 784.871 ## N 200 200 ## ===================================  ## The linear part is just about the same. If you fit this ## again without x1 and x2, you'll see they are exactly the same. ## The do-it-yourself residual-centering leaves x3 unchanged, only ## fiddles with x3square. Thus, in some sense, it is easier to ## understand than poly(). poly() ## re-scales x3 so it is roughly on the same range as the quadratic part. ## This is beneficial in numerical linear algebra (computer math). ## Observe for yourself, the first poly term is just ## x3, shrunken proportionally. plot(x3poly[, 1] ~ x3, data = dat)  ## You notice that x3poly[ ,1] is mean centered: mean(x3poly[ ,1])  ## [1] 2.459e-18  summary(lm(x3poly[ ,1] ~ x3, data = dat))  ## Warning: essentially perfect fit: summary may be unreliable  ## ## Call: ## lm(formula = x3poly[, 1] ~ x3, data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.89e-17 -6.90e-18 -2.80e-18 9.00e-19 3.51e-16 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -1.47e-01 4.45e-18 -3.30e+16 <2e-16 *** ## x3 1.51e-01 4.11e-18 3.66e+16 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 2.73e-17 on 198 degrees of freedom ## Multiple R-squared: 1, Adjusted R-squared: 1 ## F-statistic: 1.34e+33 on 1 and 198 DF, p-value: <2e-16  ## Warning message is inconsequential ## Lets create another variable, just for comparison. newX <- data.frame(z = rnorm(1000, m = 50, sd = 10)) newX$zsquare <- newX$z^2 ## Here are the summary stats. summarize(newX)  ##$numerics
##             z  zsquare
## 0%     18.990    360.6
## 25%    42.950   1844.7
## 50%    49.581   2458.3
## 75%    55.882   3122.8
## 100%   78.451   6154.6
## mean   49.526   2546.4
## sd      9.683    966.3
## var    93.756 933805.4
## NA's    0.000      0.0
## N    1000.000   1000.0
##
## $factors ## NULL  xpol <- poly(newX$z, 2)

##               1         2
##  [1,] -0.036815  0.008153
##  [2,] -0.003082 -0.023479
##  [3,]  0.004198 -0.023235
##  [4,]  0.003102 -0.023433
##  [5,]  0.013109 -0.019519
##  [6,] -0.003836 -0.023361
##  [7,]  0.032236  0.001157
##  [8,]  0.017242 -0.016519
##  [9,]  0.006665 -0.022583
## [10,] -0.034777  0.004713
## [11,] -0.036372  0.007389
## [12,] -0.034082  0.003584
## [13,]  0.039273  0.013124
## [14,]  0.011481 -0.020479
## [15,]  0.037785  0.010398
## [16,]  0.010668 -0.020912
## [17,] -0.019891 -0.014454
## [18,]  0.058659  0.058223
## [19,] -0.030596 -0.001731
## [20,]  0.005715 -0.022868

summary(xpol)

##        1                  2
##  Min.   :-0.09978   Min.   :-0.02368
##  1st Qu.:-0.02149   1st Qu.:-0.02155
##  Median : 0.00018   Median :-0.01328
##  Mean   : 0.00000   Mean   : 0.00000
##  3rd Qu.: 0.02077   3rd Qu.: 0.00917
##  Max.   : 0.09452   Max.   : 0.21137

summarize(as.data.frame(xpol))

## $numerics ## 1 2 ## 0% -0.100 -0.024 ## 25% -0.021 -0.022 ## 50% 0.000 -0.013 ## 75% 0.021 0.009 ## 100% 0.095 0.211 ## mean 0.000 0.000 ## sd 0.032 0.032 ## var 0.001 0.001 ## NA's 0.000 0.000 ## N 1000.000 1000.000 ## ##$factors
## NULL

## What do I see there?  The means of both columns are
## almost exactly 0.
##
## And the variances are identical (hence standard deviations are
## identical too).