Read the preprocessed ESTC data table and load tools:

# Load libraries
library(ggplot2, quietly = TRUE)
library(tidyr, quietly = TRUE)
library(dplyr, quietly = TRUE)
library(reshape)
library(estc)
library(bibliographica)
# Pick USA documents only
sel.country <- "USA"
df <- filter(df.preprocessed, country == sel.country)
df$unity <- rep(1, nrow(df))

We have r nrow(df) documents from r sel.country.

Most common authors from r sel.country

p <- top_plot(df, "author", 20)
p <- p + ggtitle(paste("Most common authors from", sel.country))
p <- p + ylab("Documents") + xlab("")
print(p)

Top titles

p <- top_plot(df, "title", 20)
p <- p + ggtitle(paste("Most common titles from", sel.country))
p <- p + ylab("Documents") + xlab("")
print(p)

Historical publication volumes for top-5 publication places

Average annual output for each decade is shown by lines, the actual annual document counts are shown by points.

publications <- tapply(df$unity, list(df$publication_decade, df$publication_place), sum)
publications[is.na(publications)] <- 0 # Set NAs to 0
publications <- publications/10 # Instead of decadal sum, use average annual output 

publications.annual <- tapply(df$unity, list(df$publication_year, df$publication_place), sum)
publications.annual[is.na(publications.annual)] <- 0 # Set NAs to 0

# Keep only top-5 publication places (w.r.t. total volume)
top_places <- names(rev(sort(colSums(publications)))[1:5])
publications <- publications[, top_places]
publications.annual <- publications.annual[, top_places]

dfm <- melt(publications) 
names(dfm) <- c("Time", "Place", "Documents")

dfm.annual <- melt(publications.annual) 
names(dfm.annual) <- c("Time", "Place", "Documents")

theme_set(theme_bw(20))
p <- ggplot(dfm, aes(x = Time, y = Documents, color = Place))
p <- p + geom_line() + geom_point()
p <- p + xlab("Year") + ylab("Publications per year")
p <- p + ggtitle(paste("Publication activity ", min(dfm$Time), "-", max(dfm$Time), sep = ""))
p <- p + scale_color_manual(values=c("red", "blue", "darkgreen", "black", "pink"))
p <- p + geom_point(data = dfm.annual, aes(x = Time, y = Documents, color = Place))
print(p)


COMHIS/estc documentation built on April 7, 2022, 4:53 p.m.