Figure 8-1: A Line Plot with Legend

Portfolio Categories: All Graphics and SGR Book Graphics.

fig-8-1-ngramplot



library(RCurl)                         # Use the RCurl package to enable
                                       #   reading https: web pages

p1 = "R package"                       # Phrase1: Enter words
p2 = "SAS output"                      # Phrase2: Enter words    
y1 = "1970"                            # Set start and end years for search
y2 = "2008"                            # Must be between 1800 and 2008

# The search terms occur twice in the URL in the first instance we
# need to replace spaces with + signs. I do this in pA and pB.
# In the second instance we need to replace spaces with "%20",
# which is the ascii code for a space. I make this replacement
# in pC and pD.

pA = gsub(pattern = " ", replacement = "+", x = p1)
pB = gsub(pattern = " ", replacement = "+", x = p2)
pC = gsub(pattern = " ", replacement = "%20", x = p1)
pD = gsub(pattern = " ", replacement = "%20", x = p2)

# The following search string pastes the dates and modified search terms
# into the standard, if somewhat ungainly, ngram url.

searchURL = paste("https://books.google.com/ngrams/graph?content=",
  pA, "%2C", pB, "&year_start=", y1, "&year_end = ", y2,  
  "&corpus=15&smoothing=3&direct_url = t1%3B%2C",     
  pC, "%3B%2Cc0%3B.t1%3B%2C", pD, "%3B%2Cc0", sep = "") 

ngramdata = getURL(searchURL,          # Get webpage html from search URL
  ssl.verifyPeer = FALSE)              #   turn off SSL verification

data1 = strsplit(ngramdata,            # Split the data series that are
  split = '"timeseries": \\[')         #   demarcated w/timeseries label
data1 = unlist(data1)                  # Undo strsplit list object
data2 = data1[-1]                      # Drop material before the data 
data3 = strsplit(data2, split = "\\]") # Isolate material after the data
data3 = unlist(data3)                  # Turn data back into text vector
substr(data3, 1, 50)                   # Show 50 chars of each element
data4 = data3[c(1,3)]                  # Keep just elements 1 and 3

data5 = strsplit(data4, split = ",")   # Split up data at commas
v1 = as.numeric(data5[[1]]) * 100      # Convert to numeric and make %
v2 = as.numeric(data5[[2]]) * 100      # Convert to numeric and make %
v1[1:3]                                # Display first 3 obs in v1
v2[1:3]                                # Display first 3 obs in v2

year1 = as.numeric(y1)                 # Create series of years
year2 = as.numeric(y2)
years = seq(from = year1, to = year2, by = 1)
ngram = data.frame(years, v1, v2)      # Combine in data frame
colnames(ngram) = c("Year", p1, p2)    # Add variable labels

head(ngram)                            # Show the first few rows

# Plot the ngram data

png(filename = "illustrations/fig-8-1-ngramplot.png",
  units = "in",                        # Set measurements in inches
  res = 1200,                          # Set resolution at 1200dpi
  width = 6,                           # Width at 6 inches
  height = 4)                          # Height at 4 inches

plot(ngram$Year, ngram[,3],            # Plot the first ngram series
  type = "l",                          #   using a line plot
  lwd = 2,                             #   with line width = 2
  col = "gray",                        #   and a gray line
  xlab = "Year",                       # Set X axis label 
  ylab = NA,                           # Set Y axis label
  yaxt = "n",                          # Turn off automatic y axis values
  xaxt = "n")                          # Turn off automatic x axis values

points(ngram$Year, ngram[,2],          # Overlay second ngram series
  type = "l",                          #   using a line plot
  lwd = 2,                             #   with line width=2
  col = "black")                       #   and a black line

axis(side = 1,                         # Set the labels for the X axis
  at = seq(min(ngram$Year),            #   from minimum year
    max(ngram$Year), by = 5))          #   to max year by 5
legend("topleft",                      # Add a legend at top left corner
  inset = .025,                        #    inset by .025
  legend = c(p2, p1),                  # Legend labels from phrase 1 & 2
  fill = c("gray", "black"))           # Fill colors to match plot

dev.off()                              # Output png file