## Title: plot-drawHist-01.R
## Author: Paul Johnson
## Date: 2013-05-17
## Description: A function to draw histograms and superimpose the normal
## probabilities. Leaves open all customizations that might be applied to
## histograms.

## 2013-05-17. Corrections needed b/c R-3.0 changed return from hist

## Things that should be at the top of any worthwhile R program
pdf.options(onefile=F, family="Times", paper="special", height=4, width=6, pointsize=6)
ps.options(horizontal=F, onefile=F, family="Times", paper="special", height=4, width=6)
options(papersize="special")


## We like a certain type of histogram to demonstrate the Central Limit Theorem.

## Here's the basic idea, you get te general idea of what we are aiming at.
library(rockchalk)
## Loading required package: MASS
## Loading required package: car
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: methods
## Loading required package: Rcpp
## 
## Attaching package: 'rockchalk'
## 
## The following object is masked from 'package:MASS':
## 
##     mvrnorm
meansPoisson <- sapply(rep(1, 1000), function(param){ mean(rpois(10, lambda = param)) })
hist(meansPoisson, prob = T, main="The Simulated Sampling Distribution")
lines(density(meansPoisson), col = "red", lty= 2, lwd = 3)
m1 <- round(mean(meansPoisson),3)
sd1 <-  round(sd(meansPoisson),3)
curve(dnorm(x, m = m1 , sd = sd1), from = min(meansPoisson), to = max(meansPoisson), add = TRUE)
legend("topleft", c("Normal", "Kernel Density"), lty=1:2, col = 1:2)
legend("topright", c(paste("Obs mean=", m1), paste("Obs s.d.=", sd1)))

plot of chunk unnamed-chunk-1

## Its difficult to explain all those steps, and even more difficult to customize
## the output to have all of the information we might like. So I offere
## this fancied up function.

## There's a bunch of detail inside here because I want the user to still
## be able to use the arguments of a histogram. Those go into the "..." argument,
## and I have to sort those and apply them when relevant.

## So, in the following, the legal arguments are ALL of the ones that are valid
## for hist (see ?hist). And, as usual, I'm imposed my tastes on color and space.


drawHist <- function(x, ...){
    dots <- list(...)

    ## These are legal argument names we need to take from ...
    ## if the user supplied them.
    histNames <- c("breaks", "nclass", "include.lowest", "right")
    argsForHist1 <- dots[ names(dots) [names(dots) %in% histNames]]
    assign(deparse(substitute(x)), x)
    argsForHist1[["x"]] <- as.name(deparse(substitute(x)))
    argsForHist1$plot <- FALSE

    if(is.null(dots$breaks)){
        dots$breaks <- 100
    }
    if ((length(dots$breaks) == 1) & is.numeric(dots$breaks)){
        argsForHist1$breaks <- min(dots$breaks, length(unique(x))+1)
    }

    h1 <- do.call("hist", argsForHist1)
    cr <- range(x)

    ## Unless the ... or "h1" arguments say otherwise, use these:
    defaults <- list(prob = TRUE,
                     main = "", border = gray(.80), xlim = extendrange(cr,f=0.10),
                     ylim = rockchalk::magRange(h1$density, c(1,1.2)),
                     ylab = "density")
    defaults$xlab <- gsub(".*\\$", "", deparse(substitute(x)))
    defaults[["x"]] <- as.name(deparse(substitute(x)))

    ## Battle through the ... arguments. Find if any are legal hist arguments
    ## and insert them.
    namesOverlap <- names(dots) %in% names(formals(hist.default))
    dotsForHist <- dots[ names(dots)[namesOverlap] ]
    defaults <- modifyList(defaults, dotsForHist)

    ## Problem: Must take notice of breaks calculated by h1.
    defaults <- modifyList(defaults, list("breaks" = h1$breaks,
                                            "include.lowest" = h1$include.lowest,
                                            "right" = h1$right,
                                            "nclass" = h1$nclass))

    callhist <- do.call("hist", defaults)

    lines(density(x), col="red", lty=2, lwd=2)

    ind <- seq(cr[1], cr[2], length.out=100)
    cm1 <- round(mean(x), 3)
    cs1 <- round(sd(x), 3)
    nprob <- dnorm(ind, m = cm1, s = cs1)
    lines(ind, nprob, lty = 1, col = "black")
    nlab <-   bquote(paste("Normal(", .(round(cm1,3)),",", .(round(cs1,3))^2,")"))
    legend("topleft",legend=c("Kernel Density", as.expression(nlab)), lty=c(2,1), col=c("red","black"), lwd=c(1.5,1))
    legend("left", legend=c(paste("Obs. mean=", cm1), paste("Obs. s.d.=",cs1)))
    invisible(callhist)
}



meansNormal <- sapply(rep(0, 1000), function(mu){ mean(rnorm(1500, mu, sd = 1.3)) })

drawHist(meansNormal)

plot of chunk unnamed-chunk-1

drawHist(meansNormal, xlab = "someXvar", border = "red", breaks = 10)

plot of chunk unnamed-chunk-1

drawHist(meansNormal, xlab = "Awsomer Data", ylab = "Secret Meaning",  border = "purple", breaks = 34)

plot of chunk unnamed-chunk-1

gammaSample <- rgamma(1000, scale = 2, shape = 3)
drawHist(gammaSample)

plot of chunk unnamed-chunk-1

poisSample <- rpois(1000, l = 1)


drawHist(poisSample)
## It took some work to re-design this to it ignores silly
## requests for too many breaks. It now (rightly) refuses to
## draw more bars than there are unique observed values
drawHist(poisSample, breaks = 6)

plot of chunk unnamed-chunk-1

drawHist(poisSample, breaks = 100)

poisSample <- rpois(1000, l = 7)
drawHist(poisSample)

plot of chunk unnamed-chunk-1

drawHist(poisSample, breaks = 6)

plot of chunk unnamed-chunk-1

drawHist(poisSample, breaks = 100)

plot of chunk unnamed-chunk-1

meansPoisson <- sapply(rep(1, 1000), function(param){ mean(rpois(10, lambda = param)) })

hist(meansPoisson, prob = T, main="The Usual Histogram People Would Make")
lines(density(meansPoisson), col = "red", lty= 2, lwd = 3)
curve(dnorm(x, m = mean(meansPoisson), sd = sd(meansPoisson)), from = 0, to = 2.5, add = TRUE)

plot of chunk unnamed-chunk-1

hist(meansPoisson, prob = T, main="Slightly More Elaborate Version")
lines(density(meansPoisson), col = "red", lty= 2, lwd = 3)
m1 <- round(mean(meansPoisson),3)
sd1 <-  round(sd(meansPoisson),3)
curve(dnorm(x, m = m1 , sd = sd1), from = min(meansPoisson), to = max(meansPoisson), add = TRUE)
legend("topleft", c("Normal", "Kernel Density"), lty=1:2, col = 1:2)
legend("topright", c(paste("Obs mean=", m1), paste("Obs s.d.=", sd1)))

plot of chunk unnamed-chunk-1

## Now compare to my super drawHist
## To create device for comparison,
## dev.new()

drawHist(meansPoisson, main = "My drawHist Proposal")

plot of chunk unnamed-chunk-1

drawHist(meansPoisson, breaks = 20)

plot of chunk unnamed-chunk-1

drawHist(meansPoisson, breaks = 10)

plot of chunk unnamed-chunk-1

meansPoisson <- sapply(rep(1, 1000), function(param){ mean(rpois(1500, lambda = param)) })
drawHist(meansPoisson)

plot of chunk unnamed-chunk-1

meansPoisson <- sapply(rep(1, 1000), function(param){ mean(rpois(1500, lambda = param)) })
drawHist(meansPoisson)

plot of chunk unnamed-chunk-1

## ## Previous is complicated because we have to run histogrma
## ## two times, and the user parameters have to be respected, but (in one case)
## ## corrected. Its easy to get it wrong.

## ## The first hist run gets the description of the data. I view that,
## ## and adjust the final  plot in light of it.

## ## It seems there should be a more direct route, something like
## ##
## ## h1 <- hist(x, breaks = 10, plot = FALSE)
## ## plot(h1)

## ## That ends up calling graphics:::plot.histogram(h1). I have tried
## ## lots of ways to customize the output of that plot, but they are all
## ## rejected. It just produces whatever plot it wants to.

## ## This runs, but not well
## drawHist2 <- function(x, ...){
##     dots <- list(...)

##     ## These are legal argument names we need to take from ...
##     ## if the user supplied them.
##     histNames <- c("breaks", "nclass", "include.lowest", "right")
##     argsForHist1 <- dots[ names(dots) [names(dots) %in% histNames]]
##     assign(deparse(substitute(x)), x)
##     argsForHist1[["x"]] <- as.name(deparse(substitute(x)))
##     argsForHist1$plot <- FALSE

##     if(!is.null(dots$breaks)){
##         if (is.numeric(dots$breaks)){
##              if (length(dots$breaks) == 1){
##                  if (dots$breaks > 20){
##                      argsForHist1$breaks <- length(unique(x))+1
##                  }
##              }
##          }
##     }


##     ##h1 <- hist(x, plot = FALSE)
##     h1 <- do.call("hist", argsForHist1)
##     cr <- range(x)

##     ## Unless the ... or "h1" arguments say otherwise, use these:
##     defaults <- list(main = "", prob = T, border = gray(.80), xlim = extendrange(cr,f=0.10),
##                      ylim = rockchalk::magRange(h1$densities, c(1,1.2)),
##                      ylab = "densityddd")
##     defaults$xlab <- gsub(".*\\$", "", deparse(substitute(x)))

##     ## Battle through the ... arguments. Find if any are legal hist arguments
##     ## and insert them.
##     namesOverlap <- names(dots) %in% names(formals(graphics:::plot.histogram))
##     dotsForHist <- dots[ names(dots)[namesOverlap] ]
##     defaults <- modifyList(defaults, dotsForHist)

##    ## do.call("plot", modifyList(h1, defaults))

##     mycall <- call("plot", modifyList(h1, defaults))
##     eval(mycall)

##     lines(density(x), col="red", lty=2, lwd=2)

##     ind <- seq(cr[1], cr[2], length.out=100)
##     cm1 <- round(mean(x), 3)
##     cs1 <- round(sd(x), 3)
##     nprob <- dnorm(ind, m = cm1, s = cs1)
##     lines(ind, nprob, lty = 1, col = "black")
##     nlab <-   bquote(paste("Normal(", .(round(cm1,3)),",", .(round(cs1,3))^2,")"))
##     legend("topleft",legend=c("Kernel Density", as.expression(nlab)), lty=c(2,1), col=c("red","black"), lwd=c(1.5,1))
##     legend("left", legend=c(paste("Obs. mean=", cm1), paste("Obs. s.d.=",cs1)))
## }


## drawHist2(meansPoisson)
## drawHist2(meansPoisson, breaks=100)
## drawHist2(meansPoisson, breaks=10)




## h1 <- hist(meansPoisson, plot = F)

## plot(h1, prob = T)