library(RCurl) # Use the RCurl package to enable
# reading https: web pages
p1 = "R package" # Phrase1: Enter words
p2 = "SAS output" # Phrase2: Enter words
y1 = "1970" # Set start and end years for search
y2 = "2008" # Must be between 1800 and 2008
# The search terms occur twice in the URL in the first instance we
# need to replace spaces with + signs. I do this in pA and pB.
# In the second instance we need to replace spaces with "%20",
# which is the ascii code for a space. I make this replacement
# in pC and pD.
pA = gsub(pattern = " ", replacement = "+", x = p1)
pB = gsub(pattern = " ", replacement = "+", x = p2)
pC = gsub(pattern = " ", replacement = "%20", x = p1)
pD = gsub(pattern = " ", replacement = "%20", x = p2)
# The following search string pastes the dates and modified search terms
# into the standard, if somewhat ungainly, ngram url.
searchURL = paste("https://books.google.com/ngrams/graph?content=",
pA, "%2C", pB, "&year_start=", y1, "&year_end = ", y2,
"&corpus=15&smoothing=3&direct_url = t1%3B%2C",
pC, "%3B%2Cc0%3B.t1%3B%2C", pD, "%3B%2Cc0", sep = "")
ngramdata = getURL(searchURL, # Get webpage html from search URL
ssl.verifyPeer = FALSE) # turn off SSL verification
data1 = strsplit(ngramdata, # Split the data series that are
split = '"timeseries": \\[') # demarcated w/timeseries label
data1 = unlist(data1) # Undo strsplit list object
data2 = data1[-1] # Drop material before the data
data3 = strsplit(data2, split = "\\]") # Isolate material after the data
data3 = unlist(data3) # Turn data back into text vector
substr(data3, 1, 50) # Show 50 chars of each element
data4 = data3[c(1,3)] # Keep just elements 1 and 3
data5 = strsplit(data4, split = ",") # Split up data at commas
v1 = as.numeric(data5[[1]]) * 100 # Convert to numeric and make %
v2 = as.numeric(data5[[2]]) * 100 # Convert to numeric and make %
v1[1:3] # Display first 3 obs in v1
v2[1:3] # Display first 3 obs in v2
year1 = as.numeric(y1) # Create series of years
year2 = as.numeric(y2)
years = seq(from = year1, to = year2, by = 1)
ngram = data.frame(years, v1, v2) # Combine in data frame
colnames(ngram) = c("Year", p1, p2) # Add variable labels
head(ngram) # Show the first few rows
# Plot the ngram data
png(filename = "illustrations/fig-8-1-ngramplot.png",
units = "in", # Set measurements in inches
res = 1200, # Set resolution at 1200dpi
width = 6, # Width at 6 inches
height = 4) # Height at 4 inches
plot(ngram$Year, ngram[,3], # Plot the first ngram series
type = "l", # using a line plot
lwd = 2, # with line width = 2
col = "gray", # and a gray line
xlab = "Year", # Set X axis label
ylab = NA, # Set Y axis label
yaxt = "n", # Turn off automatic y axis values
xaxt = "n") # Turn off automatic x axis values
points(ngram$Year, ngram[,2], # Overlay second ngram series
type = "l", # using a line plot
lwd = 2, # with line width=2
col = "black") # and a black line
axis(side = 1, # Set the labels for the X axis
at = seq(min(ngram$Year), # from minimum year
max(ngram$Year), by = 5)) # to max year by 5
legend("topleft", # Add a legend at top left corner
inset = .025, # inset by .025
legend = c(p2, p1), # Legend labels from phrase 1 & 2
fill = c("gray", "black")) # Fill colors to match plot
dev.off() # Output png file
Figure 8-1: A Line Plot with Legend
Portfolio Categories: All Graphics and SGR Book Graphics.