### Filename: Normal2_2008.R
### Paul Johnson  March 28, 2008
### This code should be available somewhere in http://pj.freefaculty.org.  If it is not
### email me <pauljohn@ku.edu>


set.seed(234234)
sampleSize <- 500


mymean <- 0

mystd <- 1.5


myx <- seq( mymean - 3*mystd, mymean+ 3*mystd, length.out=sampleSize)


## Creates an empty container to hold samples:
mySamples <- list()


## for loop creates 100 samples and stores them
for (i in 1:100) {
  mySamples[[i]] <- rnorm(sampleSize,  mean = mymean, sd = mystd)
}


par(mfcol=c(5,2))

for (i in 1:10) {
  hist(mySamples[[i]],main=paste("Sample", i) )
}

plot of chunk unnamed-chunk-1

### force ranges to match
for (i in 1:10) {
  mytitle <- paste("Sample", i, "mean", round(mean(mySamples[[i]]),2),"std.dev.", round(sd(mySamples[[i]]),2) )
  myhist <- hist(mySamples[[i]],main= mytitle, freq=F, xlim=c(-6,6), xlab="Normally Distributed Variable" )
}

plot of chunk unnamed-chunk-1

par(mfcol=c(1,1))



### sapply applies the mean function to each sample. The "s" in
### sapply stands for "simplify".
myMeans <- sapply(mySamples, mean)

h1 <- hist(myMeans, freq=FALSE, plot=TRUE, main=paste("Sampling Distribution of ",100," Samples"))

mtext(text=paste("True Mean = ", mymean,  "True Standard Deviation = ", mystd) , side=3)

### Note the range of the means:
summary(myMeans)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.14600 -0.05190 -0.00112 -0.00535  0.04040  0.12100
### Compare against the range in one sample, say the 5th
summary(mySamples[[5]])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -4.640  -1.060   0.014  -0.067   0.874   5.460
text(h1$breaks[1], 5, pos = 4, paste("Mean of Means =", round(mean(myMeans),3),"\n Std. of Means ", round(sd(myMeans),3)), xpd = TRUE)

plot of chunk unnamed-chunk-1

par(mfcol=c(2,1))

myhist <- hist(mySamples[[1]],main= mytitle, freq=F, xlim=c(-6,6), xlab="Normally Distributed Variable" )


hist(myMeans,freq=F, main=paste("Sampling Distribution of ",100," Samples"), xlim=c(-6,6))

plot of chunk unnamed-chunk-1

### Another way. write a function to create the data and draw it.
### Use lapply to call it over and over




sampleSize <- 500

par(mfrow=c(5,2))


createDist <-function(i){
  z <- rnorm(sampleSize,  mean = mymean, sd = mystd)
  title <- paste("Run", i, "Normal mean=",mymean," std.dev=",mystd)
  hist(z,breaks=20,main=title)
}

createDist(1)
createDist(2)
createDist(3)
createDist(4)
createDist(5)
createDist(6)
createDist(7)
createDist(8)
createDist(9)
createDist(10)

plot of chunk unnamed-chunk-1

lapply(1:10,createDist);

plot of chunk unnamed-chunk-1

## [[1]]
## $breaks
##  [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5
## [15]  2.0  2.5  3.0  3.5  4.0
## 
## $counts
##  [1]  2  2  4  5 14 20 28 54 70 53 70 58 37 24 27 14 10  8
## 
## $density
##  [1] 0.008 0.008 0.016 0.020 0.056 0.080 0.112 0.216 0.280 0.212 0.280
## [12] 0.232 0.148 0.096 0.108 0.056 0.040 0.032
## 
## $mids
##  [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25
## [12]  0.75  1.25  1.75  2.25  2.75  3.25  3.75
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[2]]
## $breaks
##  [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5  2.0
## [15]  2.5  3.0  3.5  4.0  4.5  5.0
## 
## $counts
##  [1]  1  5  4 13 25 30 49 68 76 74 45 45 30 12 11  5  4  2  1
## 
## $density
##  [1] 0.004 0.020 0.016 0.052 0.100 0.120 0.196 0.272 0.304 0.296 0.180
## [12] 0.180 0.120 0.048 0.044 0.020 0.016 0.008 0.004
## 
## $mids
##  [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25  0.75
## [12]  1.25  1.75  2.25  2.75  3.25  3.75  4.25  4.75
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[3]]
## $breaks
##  [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5  2.0
## [15]  2.5  3.0  3.5  4.0  4.5
## 
## $counts
##  [1]  1  3  8 19 13 44 34 47 71 81 61 49 33 13 11 10  1  1
## 
## $density
##  [1] 0.004 0.012 0.032 0.076 0.052 0.176 0.136 0.188 0.284 0.324 0.244
## [12] 0.196 0.132 0.052 0.044 0.040 0.004 0.004
## 
## $mids
##  [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25  0.75
## [12]  1.25  1.75  2.25  2.75  3.25  3.75  4.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[4]]
## $breaks
##  [1] -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5  2.0
## [15]  2.5  3.0  3.5  4.0  4.5
## 
## $counts
##  [1]  1  3  4 11 21 37 44 65 78 61 59 44 28 26 12  2  2  2
## 
## $density
##  [1] 0.004 0.012 0.016 0.044 0.084 0.148 0.176 0.260 0.312 0.244 0.236
## [12] 0.176 0.112 0.104 0.048 0.008 0.008 0.008
## 
## $mids
##  [1] -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25  0.75
## [12]  1.25  1.75  2.25  2.75  3.25  3.75  4.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[5]]
## $breaks
##  [1] -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5  2.0  2.5
## [15]  3.0  3.5  4.0  4.5  5.0  5.5
## 
## $counts
##  [1]  4  5 15 21 31 48 47 71 67 56 62 31 16 14  7  4  0  0  1
## 
## $density
##  [1] 0.016 0.020 0.060 0.084 0.124 0.192 0.188 0.284 0.268 0.224 0.248
## [12] 0.124 0.064 0.056 0.028 0.016 0.000 0.000 0.004
## 
## $mids
##  [1] -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25  0.75  1.25
## [12]  1.75  2.25  2.75  3.25  3.75  4.25  4.75  5.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[6]]
## $breaks
##  [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5
## [15]  2.0  2.5  3.0  3.5  4.0  4.5
## 
## $counts
##  [1]  1  2  2  5 17 20 29 37 60 71 62 59 53 38 19 13  8  3  1
## 
## $density
##  [1] 0.004 0.008 0.008 0.020 0.068 0.080 0.116 0.148 0.240 0.284 0.248
## [12] 0.236 0.212 0.152 0.076 0.052 0.032 0.012 0.004
## 
## $mids
##  [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25
## [12]  0.75  1.25  1.75  2.25  2.75  3.25  3.75  4.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[7]]
## $breaks
##  [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5
## [15]  2.0  2.5  3.0  3.5  4.0  4.5  5.0  5.5
## 
## $counts
##  [1]  1  2  1  5 10 19 36 38 51 79 67 53 47 38 32 13  5  2  0  0  1
## 
## $density
##  [1] 0.004 0.008 0.004 0.020 0.040 0.076 0.144 0.152 0.204 0.316 0.268
## [12] 0.212 0.188 0.152 0.128 0.052 0.020 0.008 0.000 0.000 0.004
## 
## $mids
##  [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25
## [12]  0.75  1.25  1.75  2.25  2.75  3.25  3.75  4.25  4.75  5.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[8]]
## $breaks
##  [1] -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5
## [15]  1.0  1.5  2.0  2.5  3.0  3.5  4.0  4.5
## 
## $counts
##  [1]  1  0  0  1  7  8 12 18 37 43 57 72 72 60 41 27 24  9  7  1  3
## 
## $density
##  [1] 0.004 0.000 0.000 0.004 0.028 0.032 0.048 0.072 0.148 0.172 0.228
## [12] 0.288 0.288 0.240 0.164 0.108 0.096 0.036 0.028 0.004 0.012
## 
## $mids
##  [1] -5.75 -5.25 -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75
## [12] -0.25  0.25  0.75  1.25  1.75  2.25  2.75  3.25  3.75  4.25
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[9]]
## $breaks
##  [1] -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0  1.5
## [15]  2.0  2.5  3.0  3.5  4.0  4.5  5.0
## 
## $counts
##  [1]  2  2  3  1 14 29 30 45 57 71 67 56 57 26 19  8  8  3  1  1
## 
## $density
##  [1] 0.008 0.008 0.012 0.004 0.056 0.116 0.120 0.180 0.228 0.284 0.268
## [12] 0.224 0.228 0.104 0.076 0.032 0.032 0.012 0.004 0.004
## 
## $mids
##  [1] -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25  0.25
## [12]  0.75  1.25  1.75  2.25  2.75  3.25  3.75  4.25  4.75
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## [[10]]
## $breaks
##  [1] -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5  0.0  0.5  1.0
## [15]  1.5  2.0  2.5  3.0  3.5  4.0  4.5  5.0
## 
## $counts
##  [1]  2  0  1  5  3 13 29 35 53 52 58 56 63 40 47 24 12  4  2  0  1
## 
## $density
##  [1] 0.008 0.000 0.004 0.020 0.012 0.052 0.116 0.140 0.212 0.208 0.232
## [12] 0.224 0.252 0.160 0.188 0.096 0.048 0.016 0.008 0.000 0.004
## 
## $mids
##  [1] -5.25 -4.75 -4.25 -3.75 -3.25 -2.75 -2.25 -1.75 -1.25 -0.75 -0.25
## [12]  0.25  0.75  1.25  1.75  2.25  2.75  3.25  3.75  4.25  4.75
## 
## $xname
## [1] "z"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
par(mfrow=c(1,1))









### Another way to show a sampling distribution. This uses R's built in "Replicate"


sampleSize <- 10000

repls <- 500

mytitle <- paste("Replications=",repls,"SampleSize=",sampleSize,"Mean=",mymean,"Std.Dev=",mystd)

means <- replicate(repls,mean(rnorm(sampleSize,mean=mymean,sd=mystd)))

myhist <- hist(means, main=mytitle, breaks=15)

# Display some results in text inside the histo

# I know there should be a better way, but don't know it

# Note that in text, \n has a "next line" effect

mylabel <- paste("E(x)=",mymean,"\nobserved

mean=",round(mean(means),2),"\nobserved std.dev \nof means=",round(sd(means),2))

# This puts text into the histogram, guessing

# the positions from the 6'th bin's position

text(myhist$breaks[6],.8*max(myhist$counts),mylabel ,pos=2)

plot of chunk unnamed-chunk-1

sampleSize <- 10000

repls <- 500

mytitle <- paste("uniforms")

means <- replicate(repls,mean(runif(sampleSize,min=0,max=222)))

par(mfcol=c(5,2))

for (i in 1:10)
hist(runif(sampleSize, min=0, max=222))

plot of chunk unnamed-chunk-1

##dev.new()

myhist <- hist(means, main=mytitle, breaks=15)

# Display some results in text inside the histo

# I know there should be a better way, but don't know it

# Note that in text, \n has a "next line" effect

mylabel <- paste("E(x)=",mymean,"\nobserved mean=",round(mean(means),2),"\nobserved std.dev \nof means=",round(sd(means),2))

# This puts text into the histogram, guessing

# the positions from the 6'th bin's position

text(myhist$breaks[4], .8*max(myhist$counts), mylabel ,pos=2)


## TODO: superimpose Normal() on that histogram

plot of chunk unnamed-chunk-1