### Filename: Normal2_2008.R
### Paul Johnson March 28, 2008
### This code should be available somewhere in http://pj.freefaculty.org. If it is not
### email me <pauljohn@ku.edu>
set.seed(234234)
sampleSize <- 500
mymean <- 0
mystd <- 1.5
myx <- seq( mymean - 3*mystd, mymean+ 3*mystd, length.out=sampleSize)
## Creates an empty container to hold samples:
mySamples <- list()
## for loop creates 100 samples and stores them
for (i in 1:100) {
mySamples[[i]] <- rnorm(sampleSize, mean = mymean, sd = mystd)
}
par(mfcol=c(5,2))
for (i in 1:10) {
hist(mySamples[[i]],main=paste("Sample", i) )
}
### force ranges to match
for (i in 1:10) {
mytitle <- paste("Sample", i, "mean", round(mean(mySamples[[i]]),2),"std.dev.", round(sd(mySamples[[i]]),2) )
myhist <- hist(mySamples[[i]],main= mytitle, freq=F, xlim=c(-6,6), xlab="Normally Distributed Variable" )
}
par(mfcol=c(1,1))
### sapply applies the mean function to each sample. The "s" in
### sapply stands for "simplify".
myMeans <- sapply(mySamples, mean)
h1 <- hist(myMeans, freq=FALSE, plot=TRUE, main=paste("Sampling Distribution of ",100," Samples"))
mtext(text=paste("True Mean = ", mymean, "True Standard Deviation = ", mystd) , side=3)
### Note the range of the means:
summary(myMeans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.14600 -0.05190 -0.00112 -0.00535 0.04040 0.12100
### Compare against the range in one sample, say the 5th
summary(mySamples[[5]])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.640 -1.060 0.014 -0.067 0.874 5.460
text(h1$breaks[1], 5, pos = 4, paste("Mean of Means =", round(mean(myMeans),3),"\n Std. of Means ", round(sd(myMeans),3)), xpd = TRUE)
par(mfcol=c(2,1))
myhist <- hist(mySamples[[1]],main= mytitle, freq=F, xlim=c(-6,6), xlab="Normally Distributed Variable" )
hist(myMeans,freq=F, main=paste("Sampling Distribution of ",100," Samples"), xlim=c(-6,6))
### Another way. write a function to create the data and draw it.
### Use lapply to call it over and over
sampleSize <- 500
par(mfrow=c(5,2))
createDist <-function(i){
z <- rnorm(sampleSize, mean = mymean, sd = mystd)
title <- paste("Run", i, "Normal mean=",mymean," std.dev=",mystd)
hist(z,breaks=20,main=title)
}
createDist(1)
createDist(2)
createDist(3)
createDist(4)
createDist(5)
createDist(6)
createDist(7)
createDist(8)
createDist(9)
createDist(10)
lapply(1:10,createDist);
## [[1]]
## $breaks
## [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
## [15] 2.0 2.5 3.0 3.5 4.0
##
## $counts
## [1] 2 2 4 5 14 20 28 54 70 53 70 58 37 24 27 14 10 8
##
## $density
## [1] 0.008 0.008 0.016 0.020 0.056 0.080 0.112 0.216 0.280 0.212 0.280
## [12] 0.232 0.148 0.096 0.108 0.056 0.040 0.032
##
## $mids
## [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25
## [12] 0.75 1.25 1.75 2.25 2.75 3.25 3.75
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[2]]
## $breaks
## [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0
## [15] 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 1 5 4 13 25 30 49 68 76 74 45 45 30 12 11 5 4 2 1
##
## $density
## [1] 0.004 0.020 0.016 0.052 0.100 0.120 0.196 0.272 0.304 0.296 0.180
## [12] 0.180 0.120 0.048 0.044 0.020 0.016 0.008 0.004
##
## $mids
## [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25 0.75
## [12] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[3]]
## $breaks
## [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0
## [15] 2.5 3.0 3.5 4.0 4.5
##
## $counts
## [1] 1 3 8 19 13 44 34 47 71 81 61 49 33 13 11 10 1 1
##
## $density
## [1] 0.004 0.012 0.032 0.076 0.052 0.176 0.136 0.188 0.284 0.324 0.244
## [12] 0.196 0.132 0.052 0.044 0.040 0.004 0.004
##
## $mids
## [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25 0.75
## [12] 1.25 1.75 2.25 2.75 3.25 3.75 4.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[4]]
## $breaks
## [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0
## [15] 2.5 3.0 3.5 4.0 4.5
##
## $counts
## [1] 1 3 4 11 21 37 44 65 78 61 59 44 28 26 12 2 2 2
##
## $density
## [1] 0.004 0.012 0.016 0.044 0.084 0.148 0.176 0.260 0.312 0.244 0.236
## [12] 0.176 0.112 0.104 0.048 0.008 0.008 0.008
##
## $mids
## [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25 0.75
## [12] 1.25 1.75 2.25 2.75 3.25 3.75 4.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[5]]
## $breaks
## [1] -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5
## [15] 3.0 3.5 4.0 4.5 5.0 5.5
##
## $counts
## [1] 4 5 15 21 31 48 47 71 67 56 62 31 16 14 7 4 0 0 1
##
## $density
## [1] 0.016 0.020 0.060 0.084 0.124 0.192 0.188 0.284 0.268 0.224 0.248
## [12] 0.124 0.064 0.056 0.028 0.016 0.000 0.000 0.004
##
## $mids
## [1] -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25 0.75 1.25
## [12] 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[6]]
## $breaks
## [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
## [15] 2.0 2.5 3.0 3.5 4.0 4.5
##
## $counts
## [1] 1 2 2 5 17 20 29 37 60 71 62 59 53 38 19 13 8 3 1
##
## $density
## [1] 0.004 0.008 0.008 0.020 0.068 0.080 0.116 0.148 0.240 0.284 0.248
## [12] 0.236 0.212 0.152 0.076 0.052 0.032 0.012 0.004
##
## $mids
## [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25
## [12] 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[7]]
## $breaks
## [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
## [15] 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5
##
## $counts
## [1] 1 2 1 5 10 19 36 38 51 79 67 53 47 38 32 13 5 2 0 0 1
##
## $density
## [1] 0.004 0.008 0.004 0.020 0.040 0.076 0.144 0.152 0.204 0.316 0.268
## [12] 0.212 0.188 0.152 0.128 0.052 0.020 0.008 0.000 0.000 0.004
##
## $mids
## [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25
## [12] 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[8]]
## $breaks
## [1] -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5
## [15] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5
##
## $counts
## [1] 1 0 0 1 7 8 12 18 37 43 57 72 72 60 41 27 24 9 7 1 3
##
## $density
## [1] 0.004 0.000 0.000 0.004 0.028 0.032 0.048 0.072 0.148 0.172 0.228
## [12] 0.288 0.288 0.240 0.164 0.108 0.096 0.036 0.028 0.004 0.012
##
## $mids
## [1] -5.75 -5.25 -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75
## [12] -0.25 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[9]]
## $breaks
## [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
## [15] 2.0 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 2 2 3 1 14 29 30 45 57 71 67 56 57 26 19 8 8 3 1 1
##
## $density
## [1] 0.008 0.008 0.012 0.004 0.056 0.116 0.120 0.180 0.228 0.284 0.268
## [12] 0.224 0.228 0.104 0.076 0.032 0.032 0.012 0.004 0.004
##
## $mids
## [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25 0.25
## [12] 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## [[10]]
## $breaks
## [1] -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0
## [15] 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 2 0 1 5 3 13 29 35 53 52 58 56 63 40 47 24 12 4 2 0 1
##
## $density
## [1] 0.008 0.000 0.004 0.020 0.012 0.052 0.116 0.140 0.212 0.208 0.232
## [12] 0.224 0.252 0.160 0.188 0.096 0.048 0.016 0.008 0.000 0.004
##
## $mids
## [1] -5.25 -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25
## [12] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] "z"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
par(mfrow=c(1,1))
### Another way to show a sampling distribution. This uses R's built in "Replicate"
sampleSize <- 10000
repls <- 500
mytitle <- paste("Replications=",repls,"SampleSize=",sampleSize,"Mean=",mymean,"Std.Dev=",mystd)
means <- replicate(repls,mean(rnorm(sampleSize,mean=mymean,sd=mystd)))
myhist <- hist(means, main=mytitle, breaks=15)
# Display some results in text inside the histo
# I know there should be a better way, but don't know it
# Note that in text, \n has a "next line" effect
mylabel <- paste("E(x)=",mymean,"\nobserved
mean=",round(mean(means),2),"\nobserved std.dev \nof means=",round(sd(means),2))
# This puts text into the histogram, guessing
# the positions from the 6'th bin's position
text(myhist$breaks[6],.8*max(myhist$counts),mylabel ,pos=2)
sampleSize <- 10000
repls <- 500
mytitle <- paste("uniforms")
means <- replicate(repls,mean(runif(sampleSize,min=0,max=222)))
par(mfcol=c(5,2))
for (i in 1:10)
hist(runif(sampleSize, min=0, max=222))
##dev.new()
myhist <- hist(means, main=mytitle, breaks=15)
# Display some results in text inside the histo
# I know there should be a better way, but don't know it
# Note that in text, \n has a "next line" effect
mylabel <- paste("E(x)=",mymean,"\nobserved mean=",round(mean(means),2),"\nobserved std.dev \nof means=",round(sd(means),2))
# This puts text into the histogram, guessing
# the positions from the 6'th bin's position
text(myhist$breaks[4], .8*max(myhist$counts), mylabel ,pos=2)
## TODO: superimpose Normal() on that histogram