## Paul Johnson
## 2013-04-11
## regression-quadratic-1.R
##
## Explore quadratic regression. Explain ways to fit that in R.
## Demonstrates use of predict, newdata, and R's poly() function.
##
## From now on, anybody who wants help with quadratic regressions,
## well, has to run this through R. And read a lot of pithy comments.
## Manufacture some data.
set.seed(444532)
x3sd <- 0.5
dat <- data.frame(x1 = rnorm(200),
x2 = rnorm(200, s = 1),
x3 = rnorm(200, m = 1, s = x3sd))
b0 <- 0.4
b1 <- 0.7
b2 <- 1.5
b3 <- -0.9
b4 <- 1.4
d <- 2
stde <- 1.5
dat$y <- with(dat,
b0 + b1 * x1 + b2 * x2 + b3 * x3 + b4 * x3^2 + rnorm(200, s = stde))
## That has a very minor curvature in it. The key coefficient
## here is b4 = 1.4, meaning we have a U-shaped curve in x3.
## Plot just the quadratic part, to get the feeling. Note the plotmath.
## Only try that on a closed course with a trained driver :)
quad <- function(x) b0 + b3 *x + b4 * x^2
curve(quad, from = -2, to = 2, xlab = "x3", ylab = substitute(y == A + B * x3 + C * x3^2, list(A = b0, B = b3, C = b4) ))
## Suppose you estimate a linear model
m0 <- lm(y ~ x1 + x2 + x3, data = dat)
summary(m0)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.328 -1.182 -0.056 1.095 4.855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.646 0.269 -2.40 0.017 *
## x1 0.787 0.121 6.51 6.3e-10 ***
## x2 1.408 0.114 12.37 < 2e-16 ***
## x3 2.035 0.248 8.21 2.8e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.63 on 196 degrees of freedom
## Multiple R-squared: 0.563, Adjusted R-squared: 0.557
## F-statistic: 84.3 on 3 and 196 DF, p-value: <2e-16
## Please note: just because your "p value" is "really great", it does
## not mean you have "the right" model.
## There's a little bit of nonlinearity, as in the residual plot
plot(m0, which = 1)
## Its tough to see, though.
termplot(m0, partial.resid = TRUE, terms = "x3")
## The "truth" is quadratic. We'd like to estimate
## b4, and there are many ways that this can be done.
##
## I'm going to show 4 ways, one of which is more strongly
## recommended, but, as we see, they are all really
## interchangeable.
## Method 1. The old fashioned way.
## Create a new variable x3square and insert that.
dat$x3square <- dat$x3 * dat$x3
m1 <- lm(y ~ x1 + x2 + x3 + x3square, data = dat)
summary(m1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1115 0.3945 0.28 0.78
## x1 0.7556 0.1199 6.30 1.9e-09 ***
## x2 1.4197 0.1123 12.64 < 2e-16 ***
## x3 0.0678 0.7970 0.09 0.93
## x3square 0.9924 0.3828 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
## I'll ask for variables one at a time so you see the problem.
## Interactively, I'd run this
## termplot(m1, partial.resid = TRUE)
## Now, I'm in a script, and I have to do:
termplot(m1, partial.resid = TRUE, terms = "x1")
termplot(m1, partial.resid = TRUE, terms = "x2")
termplot(m1, partial.resid = TRUE, terms = "x3")
termplot(m1, partial.resid = TRUE, terms = "x3square")
## You don't see much curve because the parts of
## x3 are separated into 2 columns, each of which
## is seen as a linear element in the regression.
## While we are here, look at the predictor "design matrix":
head(model.matrix(m1, 10))
## (Intercept) x1 x2 x3 x3square
## 1 1 -0.96188 1.2770 0.5939 0.3527
## 2 1 1.30351 0.3905 0.6015 0.3618
## 3 1 1.65021 0.2225 0.9298 0.8645
## 4 1 0.72121 -1.0704 0.4411 0.1945
## 5 1 0.05278 0.4767 0.7713 0.5950
## 6 1 0.39589 -0.3344 0.7803 0.6088
## Method 2. Technically equivalent to Method 1. We can introduce
## the squared term and R will construct the equivalent of
## x3square for us, automatically.
## Method 2a. This is the most elementary way, it is the way new
## users always want to do it.
m2a <- lm(y ~ x1 + x2 + x3 + I(x3*x3), data = dat)
## same as
## m2a <- lm(y ~ x1 + x2 + x3 + I(x3^2), data = dat)
summary(m2a)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + I(x3 * x3), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1115 0.3945 0.28 0.78
## x1 0.7556 0.1199 6.30 1.9e-09 ***
## x2 1.4197 0.1123 12.64 < 2e-16 ***
## x3 0.0678 0.7970 0.09 0.93
## I(x3 * x3) 0.9924 0.3828 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
## Look at the predictor matrix:
head(model.matrix(m2a, 10))
## (Intercept) x1 x2 x3 I(x3 * x3)
## 1 1 -0.96188 1.2770 0.5939 0.3527
## 2 1 1.30351 0.3905 0.6015 0.3618
## 3 1 1.65021 0.2225 0.9298 0.8645
## 4 1 0.72121 -1.0704 0.4411 0.1945
## 5 1 0.05278 0.4767 0.7713 0.5950
## 6 1 0.39589 -0.3344 0.7803 0.6088
## Method 2b. Introduced only to
## show the poly() function. The poly function can be "forced" to
## create same x3 and x3squared vectors by the raw = TRUE argument.
m2b <- lm(y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE), data=dat)
summary(m2b)
##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE),
## data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1115 0.3945 0.28 0.78
## x1 0.7556 0.1199 6.30 1.9e-09 ***
## x2 1.4197 0.1123 12.64 < 2e-16 ***
## poly(x3, degree = d, raw = TRUE)1 0.0678 0.7970 0.09 0.93
## poly(x3, degree = d, raw = TRUE)2 0.9924 0.3828 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
## See? Same!
cbind(coef(summary(m2a))[, 1:2], coef(summary(m2b))[, 1:2])
## Estimate Std. Error Estimate Std. Error
## (Intercept) 0.11151 0.3945 0.11151 0.3945
## x1 0.75564 0.1199 0.75564 0.1199
## x2 1.41971 0.1123 1.41971 0.1123
## x3 0.06782 0.7970 0.06782 0.7970
## I(x3 * x3) 0.99240 0.3828 0.99240 0.3828
## Look at the predictor matrix:
head(model.matrix(m2b, 10))
## (Intercept) x1 x2 poly(x3, degree = d, raw = TRUE)1
## 1 1 -0.96188 1.2770 0.5939
## 2 1 1.30351 0.3905 0.6015
## 3 1 1.65021 0.2225 0.9298
## 4 1 0.72121 -1.0704 0.4411
## 5 1 0.05278 0.4767 0.7713
## 6 1 0.39589 -0.3344 0.7803
## poly(x3, degree = d, raw = TRUE)2
## 1 0.3527
## 2 0.3618
## 3 0.8645
## 4 0.1945
## 5 0.5950
## 6 0.6088
## Method 3. The R recommended method poly().
## We are recommended to use "orthogonal polynomials" instead.
## This is more numerically stable.
m3 <- lm(y ~ x1 + x2 + poly(x3, degree = d), data=dat)
summary(m3)
##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.341 0.114 11.73 < 2e-16 ***
## x1 0.756 0.120 6.30 1.9e-09 ***
## x2 1.420 0.112 12.64 < 2e-16 ***
## poly(x3, degree = d)1 13.461 1.622 8.30 1.7e-14 ***
## poly(x3, degree = d)2 4.201 1.620 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
termplot(m3, partial.resid = TRUE, terms = "x1")
termplot(m3, partial.resid = TRUE, terms = "x2")
termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")
## Look at the predictor matrix, you see the poly() created
## 2 columns:
head(model.matrix(m3, 10))
## (Intercept) x1 x2 poly(x3, degree = d)1 poly(x3, degree = d)2
## 1 1 -0.96188 1.2770 -0.05747 -0.01570
## 2 1 1.30351 0.3905 -0.05632 -0.01710
## 3 1 1.65021 0.2225 -0.00691 -0.05140
## 4 1 0.72121 -1.0704 -0.08047 0.01819
## 5 1 0.05278 0.4767 -0.03076 -0.04120
## 6 1 0.39589 -0.3344 -0.02941 -0.04209
## Hypo test rejects m0, incidentally:
anova(m3, m0, test = "F")
## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2 + poly(x3, degree = d)
## Model 2: y ~ x1 + x2 + x3
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 195 506
## 2 196 523 -1 -17.4 6.72 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Coefficients look different, but that's an illusion.
## From a given combination of (x1, x2, x3), the predictions
## are exactly the same. See?
## Predicted values are identical.
cbind(m1$fitted, m2b$fitted, m3$fitted)
## [,1] [,2] [,3]
## 1 1.587901 1.587901 1.587901
## 2 2.050746 2.050746 2.050746
## 3 2.595258 2.595258 2.595258
## 4 -0.640246 -0.640246 -0.640246
## 5 1.470894 1.470894 1.470894
## 6 0.592977 0.592977 0.592977
## 7 3.512096 3.512096 3.512096
## 8 -1.888601 -1.888601 -1.888601
## 9 2.970413 2.970413 2.970413
## 10 1.500665 1.500665 1.500665
## 11 0.842083 0.842083 0.842083
## 12 3.687050 3.687050 3.687050
## 13 2.621645 2.621645 2.621645
## 14 1.563314 1.563314 1.563314
## 15 -0.681850 -0.681850 -0.681850
## 16 0.254745 0.254745 0.254745
## 17 3.127178 3.127178 3.127178
## 18 3.781417 3.781417 3.781417
## 19 -0.260491 -0.260491 -0.260491
## 20 2.034126 2.034126 2.034126
## 21 0.002801 0.002801 0.002801
## 22 3.327749 3.327749 3.327749
## 23 0.179792 0.179792 0.179792
## 24 2.656736 2.656736 2.656736
## 25 2.146502 2.146502 2.146502
## 26 1.915380 1.915380 1.915380
## 27 3.366620 3.366620 3.366620
## 28 4.799667 4.799667 4.799667
## 29 -0.408718 -0.408718 -0.408718
## 30 1.707546 1.707546 1.707546
## 31 3.598631 3.598631 3.598631
## 32 -0.221897 -0.221897 -0.221897
## 33 -2.435707 -2.435707 -2.435707
## 34 1.280965 1.280965 1.280965
## 35 1.306584 1.306584 1.306584
## 36 1.403266 1.403266 1.403266
## 37 -0.566250 -0.566250 -0.566250
## 38 2.595908 2.595908 2.595908
## 39 1.807318 1.807318 1.807318
## 40 1.370907 1.370907 1.370907
## 41 0.579466 0.579466 0.579466
## 42 1.261429 1.261429 1.261429
## 43 -0.937704 -0.937704 -0.937704
## 44 2.036284 2.036284 2.036284
## 45 -0.925975 -0.925975 -0.925975
## 46 4.121259 4.121259 4.121259
## 47 4.325489 4.325489 4.325489
## 48 3.304361 3.304361 3.304361
## 49 1.822078 1.822078 1.822078
## 50 4.225504 4.225504 4.225504
## 51 2.066135 2.066135 2.066135
## 52 0.510657 0.510657 0.510657
## 53 3.573680 3.573680 3.573680
## 54 4.026918 4.026918 4.026918
## 55 0.852517 0.852517 0.852517
## 56 4.198544 4.198544 4.198544
## 57 0.439740 0.439740 0.439740
## 58 2.465801 2.465801 2.465801
## 59 3.580910 3.580910 3.580910
## 60 -2.126628 -2.126628 -2.126628
## 61 2.751106 2.751106 2.751106
## 62 3.144364 3.144364 3.144364
## 63 0.091496 0.091496 0.091496
## 64 2.114355 2.114355 2.114355
## 65 -1.800662 -1.800662 -1.800662
## 66 -2.011919 -2.011919 -2.011919
## 67 -0.928130 -0.928130 -0.928130
## 68 0.914971 0.914971 0.914971
## 69 1.492407 1.492407 1.492407
## 70 2.871130 2.871130 2.871130
## 71 -2.812713 -2.812713 -2.812713
## 72 3.681676 3.681676 3.681676
## 73 1.327964 1.327964 1.327964
## 74 2.120344 2.120344 2.120344
## 75 2.557755 2.557755 2.557755
## 76 3.370278 3.370278 3.370278
## 77 0.603910 0.603910 0.603910
## 78 -0.836674 -0.836674 -0.836674
## 79 1.059092 1.059092 1.059092
## 80 0.705270 0.705270 0.705270
## 81 -2.008946 -2.008946 -2.008946
## 82 0.899437 0.899437 0.899437
## 83 -0.650376 -0.650376 -0.650376
## 84 -0.523109 -0.523109 -0.523109
## 85 5.175379 5.175379 5.175379
## 86 3.214712 3.214712 3.214712
## 87 1.564456 1.564456 1.564456
## 88 0.319828 0.319828 0.319828
## 89 -0.373367 -0.373367 -0.373367
## 90 3.629702 3.629702 3.629702
## 91 -0.918426 -0.918426 -0.918426
## 92 -0.136196 -0.136196 -0.136196
## 93 -0.147370 -0.147370 -0.147370
## 94 0.181605 0.181605 0.181605
## 95 -0.016103 -0.016103 -0.016103
## 96 -1.009314 -1.009314 -1.009314
## 97 0.943233 0.943233 0.943233
## 98 1.131804 1.131804 1.131804
## 99 0.315319 0.315319 0.315319
## 100 0.478852 0.478852 0.478852
## 101 -0.457249 -0.457249 -0.457249
## 102 3.254159 3.254159 3.254159
## 103 0.509734 0.509734 0.509734
## 104 -0.538809 -0.538809 -0.538809
## 105 -0.802339 -0.802339 -0.802339
## 106 3.948450 3.948450 3.948450
## 107 5.719634 5.719634 5.719634
## 108 0.957162 0.957162 0.957162
## 109 2.410540 2.410540 2.410540
## 110 -0.593465 -0.593465 -0.593465
## 111 2.376358 2.376358 2.376358
## 112 4.492950 4.492950 4.492950
## 113 -0.099627 -0.099627 -0.099627
## 114 2.327146 2.327146 2.327146
## 115 -2.561914 -2.561914 -2.561914
## 116 2.810497 2.810497 2.810497
## 117 2.215388 2.215388 2.215388
## 118 0.817026 0.817026 0.817026
## 119 2.261901 2.261901 2.261901
## 120 1.469913 1.469913 1.469913
## 121 -0.711707 -0.711707 -0.711707
## 122 -1.676843 -1.676843 -1.676843
## 123 1.097083 1.097083 1.097083
## 124 0.018402 0.018402 0.018402
## 125 0.031408 0.031408 0.031408
## 126 0.549038 0.549038 0.549038
## 127 3.144330 3.144330 3.144330
## 128 0.508807 0.508807 0.508807
## 129 -0.196184 -0.196184 -0.196184
## 130 1.295479 1.295479 1.295479
## 131 3.588403 3.588403 3.588403
## 132 0.998048 0.998048 0.998048
## 133 1.973393 1.973393 1.973393
## 134 -2.138805 -2.138805 -2.138805
## 135 2.420122 2.420122 2.420122
## 136 1.141248 1.141248 1.141248
## 137 3.007974 3.007974 3.007974
## 138 5.184813 5.184813 5.184813
## 139 0.277537 0.277537 0.277537
## 140 2.669536 2.669536 2.669536
## 141 1.985234 1.985234 1.985234
## 142 6.342566 6.342566 6.342566
## 143 2.661660 2.661660 2.661660
## 144 2.456213 2.456213 2.456213
## 145 -1.748226 -1.748226 -1.748226
## 146 -0.585198 -0.585198 -0.585198
## 147 4.343878 4.343878 4.343878
## 148 1.893083 1.893083 1.893083
## 149 3.044453 3.044453 3.044453
## 150 3.103273 3.103273 3.103273
## 151 -1.605090 -1.605090 -1.605090
## 152 3.366576 3.366576 3.366576
## 153 3.199743 3.199743 3.199743
## 154 -0.547855 -0.547855 -0.547855
## 155 3.997156 3.997156 3.997156
## 156 1.218089 1.218089 1.218089
## 157 2.887254 2.887254 2.887254
## 158 0.570990 0.570990 0.570990
## 159 -1.339621 -1.339621 -1.339621
## 160 1.302049 1.302049 1.302049
## 161 4.807972 4.807972 4.807972
## 162 2.777687 2.777687 2.777687
## 163 1.404771 1.404771 1.404771
## 164 -2.209261 -2.209261 -2.209261
## 165 6.036649 6.036649 6.036649
## 166 -0.662940 -0.662940 -0.662940
## 167 -0.117769 -0.117769 -0.117769
## 168 -0.223198 -0.223198 -0.223198
## 169 4.469197 4.469197 4.469197
## 170 0.956474 0.956474 0.956474
## 171 2.916498 2.916498 2.916498
## 172 -0.691866 -0.691866 -0.691866
## 173 2.106454 2.106454 2.106454
## 174 1.377192 1.377192 1.377192
## 175 1.000602 1.000602 1.000602
## 176 1.304451 1.304451 1.304451
## 177 0.708038 0.708038 0.708038
## 178 1.700724 1.700724 1.700724
## 179 1.205922 1.205922 1.205922
## 180 2.172935 2.172935 2.172935
## 181 1.420999 1.420999 1.420999
## 182 4.103009 4.103009 4.103009
## 183 4.749370 4.749370 4.749370
## 184 3.400597 3.400597 3.400597
## 185 2.805549 2.805549 2.805549
## 186 3.008310 3.008310 3.008310
## 187 2.077985 2.077985 2.077985
## 188 -0.453694 -0.453694 -0.453694
## 189 0.603055 0.603055 0.603055
## 190 2.256232 2.256232 2.256232
## 191 2.796632 2.796632 2.796632
## 192 0.094159 0.094159 0.094159
## 193 -0.033334 -0.033334 -0.033334
## 194 0.709930 0.709930 0.709930
## 195 3.304196 3.304196 3.304196
## 196 1.735480 1.735480 1.735480
## 197 3.891042 3.891042 3.891042
## 198 2.348177 2.348177 2.348177
## 199 -1.251348 -1.251348 -1.251348
## 200 2.021106 2.021106 2.021106
## That's not all. If you check the 95% CI's on the predicted values,
## or R^2, or anything logically comparable, they'll be same.
## Some people say it is very important to "mean center"
## the quadratic term. Lets do that, show it makes no
## difference whatsoever. x3c is mean centered:
dat$x3c <- scale(dat$x3, scale = FALSE) ## mean centered
m4 <- lm(y ~ x1 + x2 + x3c + I(x3c*x3c), data=dat)
summary(m4)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.122 0.142 7.92 1.8e-13 ***
## x1 0.756 0.120 6.30 1.9e-09 ***
## x2 1.420 0.112 12.64 < 2e-16 ***
## x3c 2.004 0.244 8.20 3.2e-14 ***
## I(x3c * x3c) 0.992 0.383 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
## Superficially better...
## Coefficients look different, but that's an illusion
## Predicted values are identical
head(cbind(m1$fitted, m2b$fitted, m3$fitted, m4$fitted))
## [,1] [,2] [,3] [,4]
## 1 1.5879 1.5879 1.5879 1.5879
## 2 2.0507 2.0507 2.0507 2.0507
## 3 2.5953 2.5953 2.5953 2.5953
## 4 -0.6402 -0.6402 -0.6402 -0.6402
## 5 1.4709 1.4709 1.4709 1.4709
## 6 0.5930 0.5930 0.5930 0.5930
## Can you explain?
## It is a known fact that the estimate of the squared term
## is the same with centered data, but the estimate of the
## linear term changes. This helps to overview that
library(memisc)
## Loading required package: lattice
## Loading required package: methods
## Loading required package: MASS
##
## Attaching package: 'memisc'
##
## The following objects are masked from 'package:stats':
##
## contrasts, contr.sum, contr.treatment
##
## The following object is masked from 'package:base':
##
## as.array
mtable(m1, m2b, m3, m4)
##
## Calls:
## m1: lm(formula = y ~ x1 + x2 + x3 + x3square, data = dat)
## m2b: lm(formula = y ~ x1 + x2 + poly(x3, degree = d, raw = TRUE),
## data = dat)
## m3: lm(formula = y ~ x1 + x2 + poly(x3, degree = d), data = dat)
## m4: lm(formula = y ~ x1 + x2 + x3c + I(x3c * x3c), data = dat)
##
## ==========================================================================
## m1 m2b m3 m4
## --------------------------------------------------------------------------
## (Intercept) 0.112 0.112 1.341*** 1.122***
## (0.395) (0.395) (0.114) (0.142)
## x1 0.756*** 0.756*** 0.756*** 0.756***
## (0.120) (0.120) (0.120) (0.120)
## x2 1.420*** 1.420*** 1.420*** 1.420***
## (0.112) (0.112) (0.112) (0.112)
## x3 0.068
## (0.797)
## x3square 0.992*
## (0.383)
## poly(x3, degree = d, raw = TRUE)1 0.068
## (0.797)
## poly(x3, degree = d, raw = TRUE)2 0.992*
## (0.383)
## poly(x3, degree = d)1 13.461***
## (1.622)
## poly(x3, degree = d)2 4.201*
## (1.620)
## x3c 2.004***
## (0.244)
## I(x3c * x3c) 0.992*
## (0.383)
## --------------------------------------------------------------------------
## R-squared 0.578 0.578 0.578 0.578
## adj. R-squared 0.569 0.569 0.569 0.569
## sigma 1.610 1.610 1.610 1.610
## F 66.779 66.779 66.779 66.779
## p 0.000 0.000 0.000 0.000
## Log-likelihood -376.540 -376.540 -376.540 -376.540
## Deviance 505.649 505.649 505.649 505.649
## AIC 765.081 765.081 765.081 765.081
## BIC 784.871 784.871 784.871 784.871
## N 200 200 200 200
## ==========================================================================
## Once you understand why that change happens, you will begin to see
## why poly() is so helpful.
## And, in case you wonder, this is one of the cases where x3 and x3^2
## are strongly correlated with one another.
cor(model.matrix(m1)[ ,-1])
## x1 x2 x3 x3square
## x1 1.00000 -0.03823 -0.11675 -0.07957
## x2 -0.03823 1.00000 0.03466 0.02005
## x3 -0.11675 0.03466 1.00000 0.95163
## x3square -0.07957 0.02005 0.95163 1.00000
## Note that termplot will cooperate if we type in the full
## name of the predictor from the model. Just using "x3" fails
termplot(m3, partial.resid = TRUE, terms = "poly(x3, degree = d)")
## Whereas the thing that seems "obvious" to me fails
termplot(m3, partial.resid = TRUE, terms = "x3")
## Error: subscript out of bounds
## You should see the same error I do:
## Error in predict.lm(model, type = "terms", se.fit = se, terms = terms) :
## That's one of the things I've tried to fix in rockchalk's plotCurves
library(rockchalk)
## Loading required package: car
##
## Attaching package: 'car'
##
## The following object is masked from 'package:memisc':
##
## recode
##
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: Rcpp
##
## Attaching package: 'rockchalk'
##
## The following object is masked from 'package:MASS':
##
## mvrnorm
plotCurves(m3, plotx = "x3")
plotCurves(m3, plotx = "x3", interval = "conf")
## It is important to get used to the R predict() family of functions.
## A regression tool is not complete unless it has one.
## And using the newdata argument in predict is really important!
## Look what goes wrong if you accidentally omit a predictor
## from a newdata object
ndf1 <- data.frame(x2 = c(1), x3 = c(2))
predict(m1, newdata = ndf1)
## Error: object 'x1' not found
## fails
## Error in eval(expr, envir, enclos) : object 'x1' not found
## Now get it right
ndf2 <- data.frame(x1 = c(1), x2 = c(1), x3 = c(2))
predict(m2a, newdata = ndf2)
## 1
## 6.392
## succeeded
## Now, we'd like to calculate predicted values for, say, 20 values of x3, evenly
## spaced across its range. We'll leave x1 and x2 at their averages.
## That's not a difficult calcuation, but why not use the handy functions I've
## prepared for it.
predictOMatic(m3, interval = "confidence")
## $x1
## x1 x2 x3 fit lwr upr
## 1 -3.2550 0.04564 0.9757 -1.2725 -2.0931 -0.4518
## 2 -0.4605 0.04564 0.9757 0.8392 0.5389 1.1394
## 3 0.1471 0.04564 0.9757 1.2983 1.0173 1.5793
## 4 0.6096 0.04564 0.9757 1.6478 1.3346 1.9611
## 5 2.7511 0.04564 0.9757 3.2660 2.5598 3.9722
##
## $x2
## x1 x2 x3 fit lwr upr
## 1 0.07998 -2.68799 0.9757 -2.6334 -3.30626 -1.9605
## 2 0.07998 -0.59325 0.9757 0.3405 0.02425 0.6568
## 3 0.07998 0.04662 0.9757 1.2490 0.96937 1.5286
## 4 0.07998 0.77808 0.9757 2.2874 1.96742 2.6075
## 5 0.07998 2.52345 0.9757 4.7654 4.15512 5.3756
##
## $x3
## x1 x2 x3 fit lwr upr
## 1 0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671
## 2 0.07998 0.04564 0.6432 0.6910 0.4050 0.9769
## 3 0.07998 0.04564 0.9450 1.1870 0.9078 1.4661
## 4 0.07998 0.04564 1.3039 2.0125 1.7226 2.3024
## 5 0.07998 0.04564 2.0443 4.5229 3.6481 5.3977
m1pr1 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))), interval = "confidence")
m1pr1
## x1 x2 x3 fit lwr upr
## 1 0.07998 0.04564 -0.4099 0.3757 -1.1157 1.8671
## 2 0.07998 0.04564 0.2631 0.3233 -0.1484 0.7950
## 3 0.07998 0.04564 0.4411 0.4597 0.1126 0.8068
## 4 0.07998 0.04564 0.5534 0.5781 0.2744 0.8819
## 5 0.07998 0.04564 0.5984 0.6326 0.3394 0.9259
## 6 0.07998 0.04564 0.6753 0.7351 0.4527 1.0174
## 7 0.07998 0.04564 0.7458 0.8392 0.5610 1.1175
## 8 0.07998 0.04564 0.8169 0.9543 0.6769 1.2318
## 9 0.07998 0.04564 0.8695 1.0460 0.7680 1.3239
## 10 0.07998 0.04564 0.9238 1.1463 0.8674 1.4251
## 11 0.07998 0.04564 0.9549 1.2064 0.9271 1.4857
## 12 0.07998 0.04564 1.0336 1.3670 1.0867 1.6473
## 13 0.07998 0.04564 1.0991 1.5100 1.2291 1.7909
## 14 0.07998 0.04564 1.1682 1.6702 1.3883 1.9522
## 15 0.07998 0.04564 1.2732 1.9317 1.6448 2.2186
## 16 0.07998 0.04564 1.3442 2.1211 1.8258 2.4164
## 17 0.07998 0.04564 1.4565 2.4409 2.1183 2.7635
## 18 0.07998 0.04564 1.6343 2.9981 2.5831 3.4131
## 19 0.07998 0.04564 1.7877 3.5295 2.9809 4.0781
## 20 0.07998 0.04564 2.0443 4.5229 3.6481 5.3977
## Wonder why that works? Here's a middle step that gives you the hint
newdata(m3, fl = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 20))))
## x1 x2 x3
## 1 0.07998 0.04564 0.9757
plot(fit ~ x3, data = m1pr1, type = "l")
lines(upr ~ x3, data = m1pr1, col = "purple", lty = 4)
lines(lwr ~ x3, data = m1pr1, col = "purple", lty = 4)
legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")
## Notice that the curves look bad, a little jagged. Can you figure
## out why? For a hint, run this:
## I suggest users go to the beginning and fiddle the parameters. Make
## the x3sd bigger, or change b3 and b4. See what happens.
m1pr2 <- predictOMatic(m3, predVals = list(x3 = quantile(dat$x3, probs = seq(0,1, length.out = 1000))), interval = "confidence")
plot(fit ~ x3, data = m1pr2, type = "l")
lines(upr ~ x3, data = m1pr2, col = "purple", lty = 4)
lines(lwr ~ x3, data = m1pr2, col = "purple", lty = 4)
legend("topleft", legend = c("predicted","95% CI","95% CI"), lty = 4, col = "purple")
## Ooh la la, as we say in France :)
## Major points.
## 1. If the true relationship is quadratic, we may not find it unless
## we know to look for it. I have fiddled with the parameters for this
## model quite a bit. In many cases, the anova() test is unequivocal,
## rejecting m0. But quite often, it is not powerful.
## Still, a theory that hints us to look for curvature in the effect of x3
## would be most welcome.
##
## 2. In any nonlinear model, an emphasis in the interpretation is
## analysis of predicted values. These are "marginal effects." If x
## changes, how do we predict y changes? That requires us to use
## predict() methods in R.
##
## termplot does pretty well with these, the rockchalk::plotCurves
## function handles interactions and some nonlinearities that termplot
## does not manage.
## 3. Does it matter how we estimate these models?
## Some experts are emphatic about the need to mean-center x to
## ameliorate multicollinearity due to the similarity of x and x^2,
## but I believe that is completely wrong. Centering results in exactly
## the same predicted values, it is, literally, identical.
## Other R experts I trust strongly urge poly(x,2), not x + I(x*x) or such.
## I think there are several different reasons that people have in mind
## to justify that. I think some of the general prevention of digital
## computation round-off error have been addressed by improvements in the
## algorithms that are used to fit regressions. That's why we don't
## see a difference in the predicted values of the 4 methods.
## So students and I want to know "why do the smart people like
## poly() so much. I have finally grasped the answer, thanks to
## advice in the r-help email list in 2013.
## Gabor Grothendieck listed advantages of poly. I'm pasting in
## his explanations and putting in example code from examples above.
##GG 1. One benefit is if you fit a higher degree polynomial using
##GG poly(x, n) the lower degree coefficients will agree with the fit using
##GG a lower n.
##PJ I can verify that. It is strictly literally true if there
## are no other predictors than the poly terms. Example:
m3 <- lm(y ~ poly(x3, degree = 2), data=dat)
m5 <- lm(y ~ poly(x3, degree = 3), data=dat)
m6 <- lm(y ~ poly(x3, degree = 4), data=dat)
mtable(m3, m5, m6)
##
## Calls:
## m3: lm(formula = y ~ poly(x3, degree = 2), data = dat)
## m5: lm(formula = y ~ poly(x3, degree = 3), data = dat)
## m6: lm(formula = y ~ poly(x3, degree = 4), data = dat)
##
## ====================================================
## m3 m5 m6
## ----------------------------------------------------
## (Intercept) 1.467*** 1.467*** 1.467***
## (0.160) (0.160) (0.160)
## poly(x3, degree = 2)1 12.968***
## (2.265)
## poly(x3, degree = 2)2 4.397
## (2.265)
## poly(x3, degree = 3)1 12.968***
## (2.264)
## poly(x3, degree = 3)2 4.397
## (2.264)
## poly(x3, degree = 3)3 -2.433
## (2.264)
## poly(x3, degree = 4)1 12.968***
## (2.269)
## poly(x3, degree = 4)2 4.397
## (2.269)
## poly(x3, degree = 4)3 -2.433
## (2.269)
## poly(x3, degree = 4)4 -0.855
## (2.269)
## ----------------------------------------------------
## R-squared 0.156 0.161 0.162
## adj. R-squared 0.148 0.149 0.145
## sigma 2.265 2.264 2.269
## F 18.272 12.576 9.426
## p 0.000 0.000 0.000
## Log-likelihood -445.805 -445.218 -445.145
## Deviance 1010.794 1004.876 1004.145
## AIC 899.610 900.436 902.290
## BIC 912.803 916.927 922.080
## N 200 200 200
## ====================================================
##PJ However, if the other predictors, they are not orthogonal to the
##PJ poly() terms, so the argument breaks down a little bit:
m3c <- lm(y ~ x1 + x2 + poly(x3, degree = 2), data=dat)
m5c <- lm(y ~ x1 + x2 + poly(x3, degree = 3), data=dat)
m6c <- lm(y ~ x1 + x2 + poly(x3, degree = 4), data=dat)
mtable(m3c, m5c, m6c)
##
## Calls:
## m3c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
## m5c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3), data = dat)
## m6c: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4), data = dat)
##
## ====================================================
## m3c m5c m6c
## ----------------------------------------------------
## (Intercept) 1.341*** 1.341*** 1.340***
## (0.114) (0.114) (0.115)
## x1 0.756*** 0.768*** 0.774***
## (0.120) (0.120) (0.121)
## x2 1.420*** 1.410*** 1.408***
## (0.112) (0.113) (0.113)
## poly(x3, degree = 2)1 13.461***
## (1.622)
## poly(x3, degree = 2)2 4.201*
## (1.620)
## poly(x3, degree = 3)1 13.486***
## (1.622)
## poly(x3, degree = 3)2 4.177*
## (1.620)
## poly(x3, degree = 3)3 -1.714
## (1.624)
## poly(x3, degree = 4)1 13.496***
## (1.624)
## poly(x3, degree = 4)2 4.168*
## (1.622)
## poly(x3, degree = 4)3 -1.724
## (1.627)
## poly(x3, degree = 4)4 -1.075
## (1.616)
## ----------------------------------------------------
## R-squared 0.578 0.580 0.581
## adj. R-squared 0.569 0.570 0.568
## sigma 1.610 1.610 1.612
## F 66.779 53.677 44.676
## p 0.000 0.000 0.000
## Log-likelihood -376.540 -375.968 -375.739
## Deviance 505.649 502.762 501.613
## AIC 765.081 765.935 767.478
## BIC 784.871 789.024 793.864
## N 200 200 200
## ====================================================
## Compare to raw.
m3d <- lm(y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), data=dat)
m5d <- lm(y ~ x1 + x2 + poly(x3, degree = 3, raw = TRUE), data=dat)
m6d <- lm(y ~ x1 + x2 + poly(x3, degree = 4, raw = TRUE), data=dat)
mtable(m3d, m5d, m6d)
##
## Calls:
## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE),
## data = dat)
## m5d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 3, raw = TRUE),
## data = dat)
## m6d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 4, raw = TRUE),
## data = dat)
##
## ================================================================
## m3d m5d m6d
## ----------------------------------------------------------------
## (Intercept) 0.112 0.199 0.326
## (0.395) (0.403) (0.446)
## x1 0.756*** 0.768*** 0.774***
## (0.120) (0.120) (0.121)
## x2 1.420*** 1.410*** 1.408***
## (0.112) (0.113) (0.113)
## poly(x3, degree = 2, raw = TRUE)1 0.068
## (0.797)
## poly(x3, degree = 2, raw = TRUE)2 0.992*
## (0.383)
## poly(x3, degree = 3, raw = TRUE)1 -1.002
## (1.289)
## poly(x3, degree = 3, raw = TRUE)2 2.618
## (1.587)
## poly(x3, degree = 3, raw = TRUE)3 -0.601
## (0.570)
## poly(x3, degree = 4, raw = TRUE)1 -0.687
## (1.375)
## poly(x3, degree = 4, raw = TRUE)2 0.751
## (3.226)
## poly(x3, degree = 4, raw = TRUE)3 1.349
## (2.987)
## poly(x3, degree = 4, raw = TRUE)4 -0.570
## (0.857)
## ----------------------------------------------------------------
## R-squared 0.578 0.580 0.581
## adj. R-squared 0.569 0.570 0.568
## sigma 1.610 1.610 1.612
## F 66.779 53.677 44.676
## p 0.000 0.000 0.000
## Log-likelihood -376.540 -375.968 -375.739
## Deviance 505.649 502.762 501.613
## AIC 765.081 765.935 767.478
## BIC 784.871 789.024 793.864
## N 200 200 200
## ================================================================
##GG 2. A second advantage is that the t statistics are invariant under
##GG shift if you use orthogonal polynomials.
##GG compare these two:
m3a <- lm(y ~ x1 + x2 + poly(x3, degree = 2), data=dat)
m3b <- lm(y ~ x1 + x2 + poly(x3+1, degree = 2), data=dat)
summary(m3a)
##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.341 0.114 11.73 < 2e-16 ***
## x1 0.756 0.120 6.30 1.9e-09 ***
## x2 1.420 0.112 12.64 < 2e-16 ***
## poly(x3, degree = 2)1 13.461 1.622 8.30 1.7e-14 ***
## poly(x3, degree = 2)2 4.201 1.620 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
summary(m3b)
##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.704 -1.078 0.022 1.057 4.672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.341 0.114 11.73 < 2e-16 ***
## x1 0.756 0.120 6.30 1.9e-09 ***
## x2 1.420 0.112 12.64 < 2e-16 ***
## poly(x3 + 1, degree = 2)1 13.461 1.622 8.30 1.7e-14 ***
## poly(x3 + 1, degree = 2)2 4.201 1.620 2.59 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.61 on 195 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.569
## F-statistic: 66.8 on 4 and 195 DF, p-value: <2e-16
##PJ Great argument. It underscores the fact that mean-centering
## is an illusion. Observe:
m3c <- lm(y ~ x1 + x2 + poly((x3 - mean(x3)), degree = 2), data=dat)
mtable(m3a, m3b, m3c)
##
## Calls:
## m3a: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2), data = dat)
## m3b: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2), data = dat)
## m3c: lm(formula = y ~ x1 + x2 + poly((x3 - mean(x3)), degree = 2),
## data = dat)
##
## =================================================================
## m3a m3b m3c
## -----------------------------------------------------------------
## (Intercept) 1.341*** 1.341*** 1.341***
## (0.114) (0.114) (0.114)
## x1 0.756*** 0.756*** 0.756***
## (0.120) (0.120) (0.120)
## x2 1.420*** 1.420*** 1.420***
## (0.112) (0.112) (0.112)
## poly(x3, degree = 2)1 13.461***
## (1.622)
## poly(x3, degree = 2)2 4.201*
## (1.620)
## poly(x3 + 1, degree = 2)1 13.461***
## (1.622)
## poly(x3 + 1, degree = 2)2 4.201*
## (1.620)
## poly((x3 - mean(x3)), degree = 2)1 13.461***
## (1.622)
## poly((x3 - mean(x3)), degree = 2)2 4.201*
## (1.620)
## -----------------------------------------------------------------
## R-squared 0.578 0.578 0.578
## adj. R-squared 0.569 0.569 0.569
## sigma 1.610 1.610 1.610
## F 66.779 66.779 66.779
## p 0.000 0.000 0.000
## Log-likelihood -376.540 -376.540 -376.540
## Deviance 505.649 505.649 505.649
## AIC 765.081 765.081 765.081
## BIC 784.871 784.871 784.871
## N 200 200 200
## =================================================================
## Mean-centering has no effect, poly() makes that plain.
## That is not true if you don;t use orthogonal polynomials,
## Compare these two:
m3d <- lm(y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE), data=dat)
m3e <- lm(y ~ x1 + x2 + poly(x3+1, degree = 2, raw = TRUE), data=dat)
mtable(m3d, m3e)
##
## Calls:
## m3d: lm(formula = y ~ x1 + x2 + poly(x3, degree = 2, raw = TRUE),
## data = dat)
## m3e: lm(formula = y ~ x1 + x2 + poly(x3 + 1, degree = 2, raw = TRUE),
## data = dat)
##
## ==========================================================
## m3d m3e
## ----------------------------------------------------------
## (Intercept) 0.112 1.036
## (0.395) (1.517)
## x1 0.756*** 0.756***
## (0.120) (0.120)
## x2 1.420*** 1.420***
## (0.112) (0.112)
## poly(x3, degree = 2, raw = TRUE)1 0.068
## (0.797)
## poly(x3, degree = 2, raw = TRUE)2 0.992*
## (0.383)
## poly(x3 + 1, degree = 2, raw = TRUE)1 -1.917
## (1.544)
## poly(x3 + 1, degree = 2, raw = TRUE)2 0.992*
## (0.383)
## ----------------------------------------------------------
## R-squared 0.578 0.578
## adj. R-squared 0.569 0.569
## sigma 1.610 1.610
## F 66.779 66.779
## p 0.000 0.000
## Log-likelihood -376.540 -376.540
## Deviance 505.649 505.649
## AIC 765.081 765.081
## BIC 784.871 784.871
## N 200 200
## ==========================================================
## Now, the answer we've all been waiting for.
## What is an orthogonal polynomial and why does it have this special
## benefit?
## I finally understood on 2013-04-01.
## Consider the quadratic equation
## y = b0 + b1 x + b2 x^2
## The slope at point x is
##
## dy
## -- = b1 + b2 x
## dx
## And, if you re-arrange that, you see that the coefficient
## b1 is the slope plus something else:
## b1 = b2 x - dy
## ---
## dx
## So, when you fit the quadritic regression and estimate
## b1, you don't just get "the slope" you want, you also get
## b2 x in there, and, as a result, your estimate of b1 changes
## when you re-position your point on the x axis.
## On the other hand, the second derivative is a constant:
## d^2y
## ---- = b2
## dx^x
## We get the same estimate of b2 no matter if we center x or
## shift it, it is the same.
## How does the poly() function fix that? Instead of giving
## the regression 2 columns, x and x-squared, the poly function
## effectively "takes x out of x-squared" in the second column.
## When poly(x, 2) creates 2 columns, the first one is always
## proportional to x--it is just the linear part. But the second
## column has x removed, and so the 2 columns from poly() are
## completely separate. They are "orthogonal". So You could run a
## regression with the first poly() column, and then add the second
## poly() column, and the estimate from the first column would not
## change.
x3poly <- poly(dat$x3, 2)
m4a <- lm(y ~ x3poly[ ,1], data = dat)
summary(m4a)
##
## Call:
## lm(formula = y ~ x3poly[, 1], data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.149 -1.645 0.046 1.709 5.807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.467 0.161 9.09 < 2e-16 ***
## x3poly[, 1] 12.968 2.281 5.69 4.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.28 on 198 degrees of freedom
## Multiple R-squared: 0.14, Adjusted R-squared: 0.136
## F-statistic: 32.3 on 1 and 198 DF, p-value: 4.63e-08
m4b <- lm(y ~ x3poly[ ,1] + x3poly[ ,2], data = dat)
summary(m4b)
##
## Call:
## lm(formula = y ~ x3poly[, 1] + x3poly[, 2], data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.531 -1.776 0.102 1.713 4.955
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.47 0.16 9.16 < 2e-16 ***
## x3poly[, 1] 12.97 2.27 5.73 3.8e-08 ***
## x3poly[, 2] 4.40 2.27 1.94 0.054 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.27 on 197 degrees of freedom
## Multiple R-squared: 0.156, Adjusted R-squared: 0.148
## F-statistic: 18.3 on 2 and 197 DF, p-value: 5.25e-08
## Note, the coefficient on the first term DOES not change.
## Are you starting to see? If you are uncertain whether to
## add a quadratic term in a regression, this is a good way
## because the estimate of the linear part does not change
## while you are testing the additional terms.
## When there are more predictors in the equation, the
## previous is not EXACTLY true, but it is ALMOST true. It is
## not exactly true because there may be a slight correlation
## between x1 and x2 and the poly terms.
## Residual Centering: How to make your own orthogonal polynomial
## If you did not have the poly() function, how could you
## achieve the same benefit? My colleagues call this
## "residual centering" of a squared term.
## Regress x-squared on x
m5 <- lm(x3square ~ x3, data = dat)
## Take the residuals from that model. Call them x3squarerc.
dat$x3squarerc <- resid(m5)
## x3squarerc is the part of x3squared that has x3 removed.
## Note that the correlation of x3squarerc and x3 is 0, with minor
## rounding error
cor(dat$x3squarerc, dat$x3)
## [1] -7.592e-18
## But if you plot x3squarerc on x3, you see it is still
## quadratic.
plot(x3squarerc ~ x3, data = dat)
## x3squarerc is just the quadratic part of x3square, no x left.
## Now run the regression with x3 and add x3squarerc
m6 <- lm(y ~ x1 + x2 + x3, data=dat)
m7 <- lm(y ~ x1 + x2 + x3 + x3squarerc, data=dat)
mtable(m6, m7)
##
## Calls:
## m6: lm(formula = y ~ x1 + x2 + x3, data = dat)
## m7: lm(formula = y ~ x1 + x2 + x3 + x3squarerc, data = dat)
##
## ===================================
## m6 m7
## -----------------------------------
## (Intercept) -0.646* -0.636*
## (0.269) (0.265)
## x1 0.787*** 0.756***
## (0.121) (0.120)
## x2 1.408*** 1.420***
## (0.114) (0.112)
## x3 2.035*** 2.026***
## (0.248) (0.244)
## x3squarerc 0.992*
## (0.383)
## -----------------------------------
## R-squared 0.563 0.578
## adj. R-squared 0.557 0.569
## sigma 1.634 1.610
## F 84.336 66.779
## p 0.000 0.000
## Log-likelihood -379.929 -376.540
## Deviance 523.079 505.649
## AIC 769.858 765.081
## BIC 786.350 784.871
## N 200 200
## ===================================
## The linear part is just about the same. If you fit this
## again without x1 and x2, you'll see they are exactly the same.
## The do-it-yourself residual-centering leaves x3 unchanged, only
## fiddles with x3square. Thus, in some sense, it is easier to
## understand than poly(). poly()
## re-scales x3 so it is roughly on the same range as the quadratic part.
## This is beneficial in numerical linear algebra (computer math).
## Observe for yourself, the first poly term is just
## x3, shrunken proportionally.
plot(x3poly[, 1] ~ x3, data = dat)
## You notice that x3poly[ ,1] is mean centered:
mean(x3poly[ ,1])
## [1] 2.459e-18
summary(lm(x3poly[ ,1] ~ x3, data = dat))
## Warning: essentially perfect fit: summary may be unreliable
##
## Call:
## lm(formula = x3poly[, 1] ~ x3, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89e-17 -6.90e-18 -2.80e-18 9.00e-19 3.51e-16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.47e-01 4.45e-18 -3.30e+16 <2e-16 ***
## x3 1.51e-01 4.11e-18 3.66e+16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.73e-17 on 198 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.34e+33 on 1 and 198 DF, p-value: <2e-16
## Warning message is inconsequential
## Lets create another variable, just for comparison.
newX <- data.frame(z = rnorm(1000, m = 50, sd = 10))
newX$zsquare <- newX$z^2
## Here are the summary stats.
summarize(newX)
## $numerics
## z zsquare
## 0% 18.990 360.6
## 25% 42.950 1844.7
## 50% 49.581 2458.3
## 75% 55.882 3122.8
## 100% 78.451 6154.6
## mean 49.526 2546.4
## sd 9.683 966.3
## var 93.756 933805.4
## NA's 0.000 0.0
## N 1000.000 1000.0
##
## $factors
## NULL
xpol <- poly(newX$z, 2)
head(xpol, 20)
## 1 2
## [1,] -0.036815 0.008153
## [2,] -0.003082 -0.023479
## [3,] 0.004198 -0.023235
## [4,] 0.003102 -0.023433
## [5,] 0.013109 -0.019519
## [6,] -0.003836 -0.023361
## [7,] 0.032236 0.001157
## [8,] 0.017242 -0.016519
## [9,] 0.006665 -0.022583
## [10,] -0.034777 0.004713
## [11,] -0.036372 0.007389
## [12,] -0.034082 0.003584
## [13,] 0.039273 0.013124
## [14,] 0.011481 -0.020479
## [15,] 0.037785 0.010398
## [16,] 0.010668 -0.020912
## [17,] -0.019891 -0.014454
## [18,] 0.058659 0.058223
## [19,] -0.030596 -0.001731
## [20,] 0.005715 -0.022868
summary(xpol)
## 1 2
## Min. :-0.09978 Min. :-0.02368
## 1st Qu.:-0.02149 1st Qu.:-0.02155
## Median : 0.00018 Median :-0.01328
## Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.02077 3rd Qu.: 0.00917
## Max. : 0.09452 Max. : 0.21137
summarize(as.data.frame(xpol))
## $numerics
## 1 2
## 0% -0.100 -0.024
## 25% -0.021 -0.022
## 50% 0.000 -0.013
## 75% 0.021 0.009
## 100% 0.095 0.211
## mean 0.000 0.000
## sd 0.032 0.032
## var 0.001 0.001
## NA's 0.000 0.000
## N 1000.000 1000.000
##
## $factors
## NULL
## What do I see there? The means of both columns are
## almost exactly 0.
##
## And the variances are identical (hence standard deviations are
## identical too).