## Paul Johnson
## some miscellaneous regression plots poking and soaking
x1 <- runif(100, min = 50, max = 150)
x2numeric <- rnorm(100)
x2dummy <- ifelse( x2numeric < 0, 0, 1)
x2f1 <-cut(x2numeric, breaks = c(-100, 0, 100), labels = c("low","high"))
## now an error term
err1 <- rnorm(100, mean = 0, sd = 5000 )
## Manufacture an output variable, using the numeric
y <- 10 + 100 * x1 - 0.1 * x2dummy + err1
dat <- data.frame(x1, x2dummy, x2f1, y)
rm(x1, x2dummy, x2f1, y)
mytitle <- paste("Phony data for", expression(10 + 100 * x1))
plot(y ~ x1, data = dat, main = mytitle , type="n")
points(y ~ x1, data = dat, col = as.numeric(dat$x2f1), pch=16, cex=.6)
legend( 1*min(dat$x1), max(dat$y), levels(dat$x2f1), col = c(1,2), pch = 16, pt.cex = 0.6)
mymod1 <- lm(y ~ x1 + x2f1, data = dat)
summary(mymod1)
##
## Call:
## lm(formula = y ~ x1 + x2f1, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10217 -3154 -272 3210 14138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1906.9 1796.2 -1.06 0.29
## x1 119.8 17.7 6.77 1e-09 ***
## x2f1high -869.6 994.5 -0.87 0.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4940 on 97 degrees of freedom
## Multiple R-squared: 0.321, Adjusted R-squared: 0.307
## F-statistic: 22.9 on 2 and 97 DF, p-value: 7.09e-09
library(car)
## Here's the Var-Covar matrix of b-hat. It uses
## "vcov" which is now in the R base distribution
VarCovMat <- vcov(mymod1)
#get square root of diagonals to compare against lm output
sqrt(diag(VarCovMat))
## (Intercept) x1 x2f1high
## 1796.18 17.71 994.55
#check for multi-collinearity
vif(mymod1)
## x1 x2f1
## 1.012 1.012
library(rockchalk)
## Loading required package: MASS
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
##
## Attaching package: 'rockchalk'
##
## The following object is masked from 'package:MASS':
##
## mvrnorm
mcDiagnose(mymod1)
## The following auxiliary models are being estimated and returned in a list:
## x1 ~ x2f1high
## <environment: 0x58b4378>
## x2f1high ~ x1
## <environment: 0x58b4378>
## Drum roll please!
##
## And your R_j Squareds are (auxiliary Rsq)
## x1 x2f1high
## 0.01139 0.01139
## The Corresponding VIF, 1/(1-R_j^2)
## x1 x2f1high
## 1.012 1.012
## Bivariate Correlations for design matrix
## x1 x2f1high
## x1 1.00 0.11
## x2f1high 0.11 1.00
#There are 5 variants of the "heteroskedasticity consistent
#covariance matrix
hccm(mymod1, type = "hc0")
## (Intercept) x1 x2f1high
## (Intercept) 3169337 -27080.4 -505672.8
## x1 -27080 273.2 232.7
## x2f1high -505673 232.7 923282.2
hccm(mymod1, type = "hc1")
## (Intercept) x1 x2f1high
## (Intercept) 3267358 -27918.0 -521312.1
## x1 -27918 281.6 239.9
## x2f1high -521312 239.9 951837.3
hccm(mymod1, type = "hc2")
## (Intercept) x1 x2f1high
## (Intercept) 3284869 -28125.8 -517884.8
## x1 -28126 284.0 212.6
## x2f1high -517885 212.6 951126.9
hccm(mymod1, type = "hc3")
## (Intercept) x1 x2f1high
## (Intercept) 3404886 -29213.8 -530338.0
## x1 -29214 295.2 190.2
## x2f1high -530338 190.2 979908.4
hccm(mymod1, type = "hc4")
## (Intercept) x1 x2f1high
## (Intercept) 3314082 -28458.1 -513491.9
## x1 -28458 287.8 156.9
## x2f1high -513492 156.9 953561.0
#This will introduce heteroskedasticity
e <- rnorm(100, mean = 0, sd = (1+ 10*(dat$x1)^2))
dat$y2 <- 10 + 0.015 * dat$x1 - 4 * dat$x2dummy + e
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
mymod3 <- lm( y2 ~ x1 + x2f1, data = dat)
termplot(mymod3, partial.resid = TRUE, terms = "x1")
termplot(mymod3, partial.resid = TRUE, terms = "x2f1")
##Breusch-Pagan test
bptest(mymod1)
##
## studentized Breusch-Pagan test
##
## data: mymod1
## BP = 0.8871, df = 2, p-value = 0.6418
bptest(mymod3)
##
## studentized Breusch-Pagan test
##
## data: mymod3
## BP = 25.2, df = 2, p-value = 3.373e-06
plotSlopes(mymod3, plotx = "x1", modx = "x2f1")
## Functional Form test
## Ramsey's "reset test" is a general purpose specification test.
## Checks to see if including squares or cubes of predictors will help.
## If it does, then you likely don't have the correct model
resettest(mymod1 ) # default
##
## RESET test
##
## data: mymod1
## RESET = 0.6507, df1 = 2, df2 = 95, p-value = 0.524
resettest(mymod1, type = "fitted")
##
## RESET test
##
## data: mymod1
## RESET = 0.6507, df1 = 2, df2 = 95, p-value = 0.524
resettest(mymod1, type = "regressor")
##
## RESET test
##
## data: mymod1
## RESET = 0.3792, df1 = 2, df2 = 95, p-value = 0.6855
resettest(mymod1, type = "princomp")
##
## RESET test
##
## data: mymod1
## RESET = 0.3792, df1 = 2, df2 = 95, p-value = 0.6855
#TESTS for NON NESTED MODELS
mymod2 <- lm(y ~ x1 + x2f1, data = dat)
## tests for specification: fitted model tests
## Cox's test
coxtest(mymod1, mymod2)
## Cox test
##
## Model 1: y ~ x1 + x2f1
## Model 2: y ~ x1 + x2f1
## Estimate Std. Error z value Pr(>|z|)
## fitted(M1) ~ M2 0 1.96e-14 0 1
## fitted(M2) ~ M1 0 1.96e-14 0 1
## j-test ala Davidson & MacKinnon: tests model 1's adequacy
## by finding if predictor values from model 2 have explanatory
## power in model 1
jtest(mymod1, mymod2)
## J test
##
## Model 1: y ~ x1 + x2f1
## Model 2: y ~ x1 + x2f1
## Estimate Std. Error t value Pr(>|t|)
## M1 + fitted(M2) -870 995 -0.87 0.38
## M2 + fitted(M1) -870 995 -0.87 0.38
## Davidson & MacKinnon's "encompassing test". Puts both models
## into a bigger model, then uses a Wald test to see if either should
## be removed.
## encomptest(mymod1, mymod2)