#PJ
# 2003-08-30
# This is a program I wrote in 2002 to show what would
# happen if you really could calculate regression estimates
# for many repeated samples. IT makes a histogram showing
# the variation in slope estimates.
# The later program, ols2, makes a little fancier output.
# The number of samples to collect is:
nOfRuns <- 100
a <- 2 #intercept parameter
b <- 5 #slope parameter
stderror <- 3 #standard deviation of error term
# This function creates some data to use in a regression.
# Note we are treating the x as a fixed thing in the exercise.
# Because the last element here creates y, y gets returned by
# this function.
getPhonyData <- function (x){
e <-rnorm(1000,mean=0,sd=stderror)
y <- a + b*x + e
}
# This is the column for the input variable.
# We use the same inputs for every sample of error terms/ dependant
# variables.
x <- 10+5*rnorm(1000)
conductSim <- function(i){
y<-getPhonyData(x) #x is grabbed from environment
try(lm(y~x)) #output of last command is returned
}
result <- lapply(1:nOfRuns, conductSim)
#result is a list of model results. You can get the i'th one by
#result[[i]].
#collect up the estimates of "b"
getB <- function(i){result[[i]]$coefficients[2]}
#collect up estimated standard errors
bvector <- sapply(1:nOfRuns,getB)
meanb <- mean(bvector)
sdb <- sd(bvector)
hist(bvector,main="Sampling distribution of b")
text(x=5.02,y=20,paste("The true value of b is ", b))
text(x=5.02,y=15,paste("The average of the estimates \n of b is ", meanb))
text (x=5.02,y=10,paste("The standard deviation of estimates is \n of b is ", sdb))