Figure 7-1: Histogram with Overlay

Portfolio Categories: All Graphics and SGR Book Graphics.

fig-7-1-custom function-BW


# A normal curve function ------------------------------------------------------

# A function to overlay a normal curve and shaded probability regions 
# (2-tailed) on a histogram.  The function has two inputs: a variable 
# and the desired critical probability level. Note that this function
# does not do any error checking and requires that there be no missing
# values. 
#    
hist.norm = function(v, pr){           # Set up function ----------------------+                
                                       #                                       |
  vname =                              # Get the variable name from the 2nd    |
    as.character(match.call())[2]      #   element in the match.call function  |
                                       #                                       |
  f.hist = hist(v,                     # Set up a histogram                    |
    xlab = vname,                      # Add var name as x axis label          |
    main = NA,                         # Turn off title                        |
    col = "light gray")                # Shade bars light gray                 |
                                       #                                       |
  p = pr/2                             # For 2-tailed divide critical prob     |
  ztop =                               # Identify the z score at the top       |
    mean(v) + qnorm(1 - p) * sd(v)     #                                       |
  zbot =                               # Identify the bottom z score           |
    mean(v) - qnorm(1 - p) * sd(v)     #                                       |
                                       #                                       |
  k = seq(min(f.hist$breaks),          # Set up sequence from bottom           |
    max(f.hist$breaks),                #  to top of histogram range            |
    length = 1000)                     #  with 1000 observations               |                        
  y1 = dnorm(k, mean(v), sd(v))        # Y from norm dist w/v mean & sd        |
  y2 = y1 *                            # Y2 - scale Y1 to proportions of       |
  (max(f.hist$count) / max(y1))        #   the histogram.                      |
                                       #                                       |
  lines(k, y2, lty = 2)                # Add dashed normal curve line          |
                                       #                                       |
  abline(v = mean(v),                  # Add vertical line at mean             |
    col = "darkgray", lwd = 3)         # Make the line gray and thicker        |
                                       #                                       |
  polygon(                             # Shade in the bottom prob region       |
    c(min(f.hist$breaks),              # x values for shaded region            |
      k[k < zbot], zbot, zbot),        #                                       |
	  c(0, y2[k < zbot],           # y values for shaded region            |
      max(y2[k < zbot]), 0),           #                                       |
	  col = "darkgray")            # Color of shaded region                |
                                       #                                       |
  polygon(                             # Shade in the top prob region          |
    c(ztop, ztop, k[k > ztop],         # x values for shaded region            |
      max(f.hist$breaks)),             #                                       |
    c(0, max(y2[k > ztop]),            # y values for shaded region            |
      y2[k > ztop], 0),                #                                       |
    col = "darkgray")                  # Color of shaded region                |
}                                      # End of hist.normal function ----------+ 

# Show the function at work

myVar = c(1,7,5,5,6,3,8,9,2,           # Create a variable with
  6,3,4,5,5,6,8,4,3,                   # a bunch of values.
  5,5,6,9,0,11,4,5,6,
  3,7,11,5,7,4,3,4,5)
pr = .05                               # Set probability interval of interest

png(filename = "illustrations/fig-7-1-custom function-BW.png",
  units = "in",                        # Set measurements in inches
  res = 1200,                          # Set resolution at 1200dpi
  width = 6,                           # Width at 6 inches
  height = 4)                          # Height at 4 inches

par(mai = c(1, 1, .25, .25))           # Set margins - no Title
                                   
hist.norm(myVar, .05)                  # Call function to overlay normal curve
                                       
dev.off()                              # Output png file