# Scotland publishing summaries
# Read the preprocessed ESTC data table and load tools:
# Load libraries
library(ggplot2)
library(tidyr)
library(dplyr)
library(reshape2)
library(estc)
library(bibliographica)
library(dplyr)

# Pick Scotland documents only
selected_place <- "Edinburgh"
sel.country <- "Scotland"
dfo <- filter(df.preprocessed, country == sel.country & publication_year >= 1470)
dfo$unity <- rep(1, nrow(dfo))
timeinterval <- 5 # Use 5 year intervals
df <- filter(dfo, country == sel.country & publication_year >= 1470)
df$publication.timeunit <- round(df$publication_year/timeinterval)*timeinterval 
publications <- tapply(df$unity, list(df$publication.timeunit, df$publication_place), sum)
publications[is.na(publications)] <- 0 # Set NAs to 0
publications <- publications/timeinterval # Instead of decadal sum, use average annual output 
dfm <- melt(publications) 
names(dfm) <- c("Time", "Place", "Documents")
dfm <- filter(dfm, Place == selected_place)
dfm <- transform(dfm, date = as.character(Time))
dfs <- spread(dfm, Place, Documents)
dfs$date <- as.numeric(as.character(dfs$date))
dfs$varname <- dfs[[selected_place]]
dfs1 <- dfs

df <- filter(dfo, country == sel.country & publication_year >= 1470)
df <- df[grep("histor", tolower(df$title)),]
df$publication.timeunit <- round(df$publication_year/timeinterval)*timeinterval 
publications <- tapply(df$unity, list(df$publication.timeunit, df$publication_place), sum)
publications[is.na(publications)] <- 0 # Set NAs to 0
publications <- publications/timeinterval # Instead of decadal sum, use average annual output 
dfm <- melt(publications) 
names(dfm) <- c("Time", "Place", "Documents")
dfm <- filter(dfm, Place == selected_place)
dfm <- transform(dfm, date = as.character(Time))
dfs <- spread(dfm, Place, Documents)
dfs$date <- as.numeric(as.character(dfs$date))
dfs$varname <- dfs[[selected_place]]
dfs2 <- dfs


# Custom highlight for specific time intervals
dfs <- dfs1
rect_left <- c(min(na.omit(dfs$date)),
               1642, 1651+1, 
               1660, 1660+1,
               1688, 1689+1,
               1706, 1707+1,
               1776, 1776+1,
               max(na.omit(dfs$date)))
  rectangles <- data.frame(
    xmin = rect_left[-length(rect_left)],
    xmax = rect_left[-1],
    ymin = min(dfs$varname),
    ymax = max(dfs$varname))
  rectangles$shade <- rep(c("White", "Highlight"), length = nrow(rectangles))

# Draw Figure
theme_set(theme_bw(20))
p <- ggplot()
p <- p + geom_rect(data = rectangles, 
       aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=shade), alpha=0.8) + 
         scale_fill_manual(values = c("gray", "white")) +
         guides(fill = "none") 
p <- p + geom_line(data = dfs1, aes(x = date, y = varname), col = "black")
p <- p + geom_point(data = dfs1, aes(x = date, y = varname), col = "black")

p <- p + geom_line(data = dfs2, aes(x = date, y = varname), col = "darkgray")
p <- p + geom_point(data = dfs2, aes(x = date, y = varname), col = "darkgray")

p <- p + scale_x_continuous(breaks=seq(min(dfs$date), max(dfs$date), 20))
p <- p + ggtitle(paste("Julkaisutoiminta: ", selected_place))
p <- p + ylab("Dokumenttia / Vuosi")
p <- p + scale_y_log10()
print(p)


COMHIS/estc documentation built on April 7, 2022, 4:53 p.m.