## Paul Johnson
## some miscellaneous regression plots  poking and soaking

x1 <- runif(100, min = 50, max = 150)

x2numeric <- rnorm(100)
x2dummy <- ifelse( x2numeric < 0, 0, 1)

x2f1 <-cut(x2numeric, breaks = c(-100, 0, 100), labels = c("low","high"))

## now an error term
err1 <- rnorm(100, mean = 0, sd = 5000 )

## Manufacture an output variable, using the numeric
y <- 10 + 100 * x1 - 0.1 * x2dummy +   err1

dat <- data.frame(x1, x2dummy, x2f1, y)
rm(x1, x2dummy, x2f1, y)

mytitle <- paste("Phony data for", expression(10 + 100 * x1))
plot(y ~ x1, data = dat, main = mytitle , type="n")
points(y ~ x1, data = dat, col = as.numeric(dat$x2f1), pch=16, cex=.6)
legend( 1*min(dat$x1), max(dat$y), levels(dat$x2f1), col = c(1,2), pch = 16, pt.cex = 0.6)

mymod1 <- lm(y ~ x1 + x2f1, data = dat)

summary(mymod1)

## 
## Call:
## lm(formula = y ~ x1 + x2f1, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -10217  -3154   -272   3210  14138 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1906.9     1796.2   -1.06     0.29    
## x1             119.8       17.7    6.77    1e-09 ***
## x2f1high      -869.6      994.5   -0.87     0.38    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4940 on 97 degrees of freedom
## Multiple R-squared:  0.321,  Adjusted R-squared:  0.307 
## F-statistic: 22.9 on 2 and 97 DF,  p-value: 7.09e-09

library(car)

## Here's the Var-Covar matrix of b-hat. It uses
## "vcov" which is now in the R  base distribution
VarCovMat <- vcov(mymod1)

#get square root of diagonals to compare against lm output
sqrt(diag(VarCovMat))

## (Intercept)          x1    x2f1high 
##     1796.18       17.71      994.55

#check for multi-collinearity
vif(mymod1)

##    x1  x2f1 
## 1.012 1.012

library(rockchalk)

## Loading required package: MASS
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
## 
## Attaching package: 'rockchalk'
## 
## The following object is masked from 'package:MASS':
## 
##     mvrnorm

plot of chunk unnamed-chunk-1

mcDiagnose(mymod1)

## The following auxiliary models are being estimated and returned in a list:
## x1 ~ x2f1high
## <environment: 0x58b4378>
## x2f1high ~ x1
## <environment: 0x58b4378>
## Drum roll please! 
## 
## And your R_j Squareds are (auxiliary Rsq)
##       x1 x2f1high 
##  0.01139  0.01139 
## The Corresponding VIF, 1/(1-R_j^2)
##       x1 x2f1high 
##    1.012    1.012 
## Bivariate Correlations for design matrix 
##            x1 x2f1high
## x1       1.00     0.11
## x2f1high 0.11     1.00

#There are 5 variants of the "heteroskedasticity consistent
#covariance matrix
hccm(mymod1, type = "hc0")

##             (Intercept)       x1  x2f1high
## (Intercept)     3169337 -27080.4 -505672.8
## x1               -27080    273.2     232.7
## x2f1high        -505673    232.7  923282.2

hccm(mymod1, type = "hc1")

##             (Intercept)       x1  x2f1high
## (Intercept)     3267358 -27918.0 -521312.1
## x1               -27918    281.6     239.9
## x2f1high        -521312    239.9  951837.3

hccm(mymod1, type = "hc2")

##             (Intercept)       x1  x2f1high
## (Intercept)     3284869 -28125.8 -517884.8
## x1               -28126    284.0     212.6
## x2f1high        -517885    212.6  951126.9

hccm(mymod1, type = "hc3")

##             (Intercept)       x1  x2f1high
## (Intercept)     3404886 -29213.8 -530338.0
## x1               -29214    295.2     190.2
## x2f1high        -530338    190.2  979908.4

hccm(mymod1, type = "hc4")

##             (Intercept)       x1  x2f1high
## (Intercept)     3314082 -28458.1 -513491.9
## x1               -28458    287.8     156.9
## x2f1high        -513492    156.9  953561.0

#This will introduce heteroskedasticity
e <- rnorm(100, mean = 0, sd = (1+ 10*(dat$x1)^2))

dat$y2 <- 10 + 0.015 * dat$x1 - 4 * dat$x2dummy +   e


library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

mymod3 <- lm( y2 ~ x1 + x2f1, data = dat)

termplot(mymod3, partial.resid = TRUE, terms = "x1")

plot of chunk unnamed-chunk-1

termplot(mymod3, partial.resid = TRUE, terms = "x2f1")

plot of chunk unnamed-chunk-1

##Breusch-Pagan test
bptest(mymod1)

## 
##  studentized Breusch-Pagan test
## 
## data:  mymod1
## BP = 0.8871, df = 2, p-value = 0.6418

bptest(mymod3)

## 
##  studentized Breusch-Pagan test
## 
## data:  mymod3
## BP = 25.2, df = 2, p-value = 3.373e-06

plotSlopes(mymod3, plotx = "x1", modx = "x2f1")

plot of chunk unnamed-chunk-1

## Functional Form test

## Ramsey's "reset test" is a general purpose specification test.
## Checks to see if including squares or cubes of predictors will help.
## If it does, then you likely don't have the correct model

resettest(mymod1 ) # default

## 
##  RESET test
## 
## data:  mymod1
## RESET = 0.6507, df1 = 2, df2 = 95, p-value = 0.524

resettest(mymod1, type = "fitted")

## 
##  RESET test
## 
## data:  mymod1
## RESET = 0.6507, df1 = 2, df2 = 95, p-value = 0.524

resettest(mymod1, type = "regressor")

## 
##  RESET test
## 
## data:  mymod1
## RESET = 0.3792, df1 = 2, df2 = 95, p-value = 0.6855

resettest(mymod1, type = "princomp")

## 
##  RESET test
## 
## data:  mymod1
## RESET = 0.3792, df1 = 2, df2 = 95, p-value = 0.6855

#TESTS for NON NESTED MODELS

mymod2 <- lm(y ~ x1 + x2f1, data = dat)

## tests for specification: fitted model tests
## Cox's test
coxtest(mymod1, mymod2)

## Cox test
## 
## Model 1: y ~ x1 + x2f1
## Model 2: y ~ x1 + x2f1
##                 Estimate Std. Error z value Pr(>|z|)
## fitted(M1) ~ M2        0   1.96e-14       0        1
## fitted(M2) ~ M1        0   1.96e-14       0        1

## j-test ala Davidson & MacKinnon: tests model 1's adequacy
## by finding if predictor values from model 2 have explanatory
## power in model 1
jtest(mymod1, mymod2)

## J test
## 
## Model 1: y ~ x1 + x2f1
## Model 2: y ~ x1 + x2f1
##                 Estimate Std. Error t value Pr(>|t|)
## M1 + fitted(M2)     -870        995   -0.87     0.38
## M2 + fitted(M1)     -870        995   -0.87     0.38

## Davidson & MacKinnon's "encompassing test". Puts both models
## into a bigger model, then uses a Wald test to see if either should
## be removed.
## encomptest(mymod1, mymod2)