knitr::opts_chunk$set(echo = TRUE)
Sys.setlocale(category = "LC_ALL", locale = "English")

\pagebreak{}

library(pander)
panderOptions("digits", 2)
library(memisc)
library(idep)
library(stringr)
library(dplyr)
library(ggplot2)
library(tidyr)
library(gridExtra)
load("Z:/Gesch\xe4ftsordnungen/Database/aggregats/isor.Rdata")
isor$t_date <- as.POSIXct(isor$t_date) + 3600 * as.integer(str_extract(isor$t_id, "\\d$"))
isor <- 
  isor  %>% 
  filter(t_date >= as.POSIXct("1945-01-01")) 
isor <- isor[, -grep("_corp_\\d|_corp_mdf_\\d|_corp_del_\\d|_corp_ins_\\d", names(isor))]
theme_set(theme_bw())
theme_stripped <- 
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5)
  ) 
xlim_time  <- coord_cartesian( xlim = c(
                  as.POSIXct("1944-12-31"),              
                  as.POSIXct("2011-08-01")) ) 
mega_scale <- scale_fill_manual(
       values=c("wds_ins"="#498000A0", "wds_mdf"="#E7A350", "wds_del"="#A81606A0",
                "wds_corp_top_66"= "#F7FBFF", 
                "wds_corp_top_1" = "#6baed6",
                "wds_corp_top_2" = "#9ecae1",  
                "wds_corp_top_3" = "#deebf7",
                "wds_corp_top_4" = "#08519c", 
                "wds_corp_top_5" = "#4292c6",
                "misc."          = "#F7FBFF00",
                "laws"           = "#6baed6", 
                "special"          = "#9ecae1",
                "elections"          = "#deebf7", 
                "control"        = "#08519c",
                "pulbic"         = "#4292c6",
                "ins"="#498000A0", "mdf"="#E7A350", "del"="#A81606A0"
                )
     )
n_countries <- length(unique(substring(isor$ctr, 1,3)))
ctr         <- unique(isor$ctr)
countries   <- unique(isor$country)
min_date    <- format(min(isor$t_date, na.rm=TRUE), "%B %d, %Y")
max_date    <- format(max(isor$t_date, na.rm=TRUE), "%B %d, %Y")
df <- data.frame("country" = countries)
df$`N SO` <- 0
df$`min y` <- "1000-01-01"
df$`max y` <- "1000-01-01"

for (i in seq_along(unique(isor$country)) ) {
  df$`min y`[i] <- format(min(isor$t_date[isor$country == countries[i]]), "%Y")
  df$`max y`[i] <- format(max(isor$t_date[isor$country == countries[i]]), "%Y")
  df$`N SO`[i]  <- sum(isor$country == countries[i])
  df$`chg`[i]   <- sum( isor$wds_chg[isor$country == countries[i]], na.rm=TRUE)
  df$`min w`[i] <- min(isor$wds_clean_rel[isor$country == countries[i]], na.rm=TRUE)
  df$`max w`[i] <- max(isor$wds_clean_rel[isor$country == countries[i]], na.rm=TRUE)
  df$wdns[i]    <- max(isor$wdns[isor$country == countries[i]], na.rm=TRUE)
}

df$`ys per SO`  <- 
  1 / round((as.numeric(df$`max y`) - as.numeric(df$`min y`)) / df$`N SO` ,2)
df$`chg per y` <- 
  round(as.numeric(str_replace(df$chg, ",", "")) / (as.numeric(df$`max y`) - as.numeric(df$`min y`)))

# df <- df[order(-df$`max w`),]
rownames(df) <- NULL
#knitr::kable(df[-length(df[1,])])
  t_size  <- 3
  legend_data <- 
    data.frame(
      xmin=0.25, xmax=0.5, ymin = seq(0,1, 1/6)[1:6], ymax = seq(0,1, 1/6)[2:7], 
      xmean = 0.375, ymean =  1/12 + seq(0,1, 1/6)[1:6], 
      label=as.factor(c("misc","laws", "special","elections", "control","pulbic")),
      fill=c("misc","laws", "special","elections", "control","pulbic")
    )

p_legend <- ggplot()
  p_legend <- # corpus legend
    p_legend +
    geom_rect(
      data = legend_data,
      aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=fill)
    ) +
    geom_text(
      data = legend_data,
      aes(x=legend_data$xmean, 
          y=legend_data$ymean, 
          label=c("misc","laws", "special","elections", "control","pulbic")),
      size=t_size, hjust=0.5, vjust=0.5, angle=90
    )

  legend_data2 <- 
    data.frame(
      xmin=0.55, xmax=0.8, ymin = seq(0,1, 1/3)[1:3], ymax = seq(0,1, 1/3)[2:4], 
      xmean = 0.675, ymean =  1/6 + seq(0,1, 1/3)[1:3], 
      label=as.factor(c("ins","mdf", "del")),  fill=c("ins","mdf", "del")
    )

  p_legend <- 
    p_legend +
    geom_rect(
      data = legend_data2,
      aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=fill)
    ) +
    geom_text(
      data = legend_data2,
      aes(x=legend_data2$xmean, 
          y=legend_data2$ymean, 
          label=c("insertion","modification","deletion")
      ),
      size=t_size, hjust=0.5, vjust=0.5, angle=90
    ) + 
    mega_scale +  
    theme(
      legend.position="none",
      axis.title   = element_blank(),
      axis.text    = element_blank(),
      axis.ticks   = element_blank(),
      axis.line    = element_blank(),
      panel.border = element_blank(),
      panel.margin = unit(0, "lines"),
      plot.margin  = unit(c(0,0,0,0),"cm")
    ) 
CTR <- unique(isor$ctr)
COUNTRY <- unique(isor$country)
for ( i in seq_along(CTR)){
  changes <- 
    isor %>% 
    select(t_id, ctr, t_date, wds_mdf, wds_ins, wds_del) %>% 
    filter(ctr==CTR[i]) %>% 
    gather(change, length, wds_ins, wds_mdf, wds_del)
  changes$year <- as.numeric(str_extract(changes$t_id, "\\d+"))
  changes$t_date <- as.POSIXct(paste0(changes$year, "-06-01"))
  changes <- aggregate(changes$length, by=list(changes$t_date, changes$change), sum)
  names(changes) <- c("t_date", "change", "length")
  changes$length[is.na(changes$length)] <- 0


  corpus <- 
    isor %>% 
    select(
      t_id, ctr, t_date, 
      wds_corp_top_66, wds_corp_top_1, wds_corp_top_2, 
      wds_corp_top_3, wds_corp_top_4, wds_corp_top_5
    ) %>% 
    filter(ctr==CTR[i]) %>% 
    gather(
      corpus, length, 
      wds_corp_top_66, wds_corp_top_1, wds_corp_top_2, 
      wds_corp_top_3, wds_corp_top_4, wds_corp_top_5
    ) %>% 
    arrange(t_date, corpus)

  p <- 
    ggplot() + 
    geom_area(
      data=corpus, 
      aes(x=t_date, y=length, group=corpus, fill=corpus)
    ) +
    geom_bar(
      data=changes, 
      aes(x=t_date, y=length, group=change, fill=change), 
      stat = 'identity'
    ) 

  p <- 
    p + 
    annotate( "text", x = as.POSIXct("1947-01-01"), y = Inf, label = COUNTRY[i], hjust=0, vjust=2, size=6, color="#3180AE")



  corpus_label <- 
    corpus[max(corpus$t_date)==corpus$t_date,"length"]
  corpus_label$y <- c(0,cumsum(corpus_label$length)[-dim(corpus_label)[1]]) + 
    corpus_label$length/2 
  corpus_label$x <- as.POSIXct("2012-01-01")
  corpus_label$label  <- c("misc.","laws", "spec.","elec.", "control","pulbic")
  corpus_label$label[ corpus_label$length < 100] <- ""

  p <- 
    p +  mega_scale + 
    guides(fill=FALSE) + 
    theme_stripped +
    xlim_time + #ylim(0,45000) + 
    geom_hline(yintercept=0, color="grey") +
    theme(
      #axis.text.x= element_text(size=5),
      axis.text.y= element_text(angle = 60)
    )

  grid.arrange(p, p_legend, ncol=2, widths=c(13,1))
  cat("\\newline{} \\n ")
}

\pagebreak{}

dutiestodo

cat("==========================================================================")
cat("file copied to: \nZ:/Geschäftsordnungen/Papiere und Konferenzen/2015_corpus_coding/in_progress/")
file.copy(
  "C:/Dropbox/RPackages/idep/inst/reports/country_graphs.pdf",
  "Z:/Geschäftsordnungen/Papiere und Konferenzen/2015_corpus_coding/in_progress/country_graphs.pdf", overwrite=TRUE
  )


petermeissner/idep documentation built on May 25, 2019, 1:53 a.m.