# Packages
library(sfi)
library(webshot)
library(ggplot2)
library(dplyr)
library(plotly)
library(ggiraph)
library(scales)
library(tidyverse)
library(directlabels)
library(knitr)
library(Hmisc)
library(gridExtra)
library(RColorBrewer)
library(extrafont)
library(kableExtra)
library(grid)

# webshot::install_phantomjs()

loadfonts()
## Global options
options(max.print="75")
opts_chunk$set(echo=FALSE,
            cache=FALSE,
              prompt=FALSE,


              tidy=TRUE,
              comment=NA,
              message=FALSE,
              warning=FALSE,
              dpi = 300,
              dev = c("png", "cairo_pdf"),
              fig.pos="!h",
              fig.path = 'figures/')
opts_knit$set(width=75)
options(xtable.comment = FALSE)

This markdown is for Frankenreiter figures.

\newpage

Frankenreiter 2 (version 1)

 # Get data
  data <- all_data$frankenreiter$f2
  data <- data %>%
    tidyr::gather(key = key,
           value = value,
           number_tokens: mean_sent_length)

  # Relabel
  key_dict <- 
    data.frame(key = c("ave_syllabels", 
                       "mean_sent_length",
                       "number_tokens", 
                       "perc_differentwords2"),
               new_key = c('Mean syllables per word',
                           'Mean sentence length',
                           'Tokens per opinion',
                           'Type-token ratio'))
  data <- left_join(data, key_dict, by= 'key')


  date_breaks <- as.Date(paste0(seq(1950, 
                                    2010,
                                    by = 10),
                                '-01-01'))
  date_labels <- as.character(seq(1950, 
                                  2010,
                                  by = 10))

  g1 <- ggplot(data = data,
               aes(x = docdate,
                   y = value)) +
    geom_point(size = 1,
               alpha = 1,
               color = 'black',
               pch = '.') +
    geom_smooth(se = TRUE,
                fill = 'darkgrey',
                alpha = 1,
                linetype = 0) +
    geom_vline(xintercept = date_breaks,
               alpha = 0.3) +
    facet_wrap(~new_key, 
               ncol = 2,
               scales = 'free_y') +
    scale_x_date(name = 'Date', 
                 breaks = date_breaks, 
                 labels = date_labels) +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
    labs(x = '',
         y = '',
         title = 'Figure 1',
         subtitle = paste0('Development over time of four measures of style over time.',
                           '\n', 'Each opinion is represented by one point per panel'),
         caption = '*Lines smoothed using local regression') +
    theme(axis.text=element_text(size = 10, hjust = 1))

  g1

Frankenreiter 2 (version 2)

 # Get data
  data <- all_data$frankenreiter$f2
  data <- data %>%
    tidyr::gather(key = key,
           value = value,
           number_tokens: mean_sent_length)

  # Relabel
  key_dict <- 
    data.frame(key = c("ave_syllabels", 
                       "mean_sent_length",
                       "number_tokens", 
                       "perc_differentwords2"),
               new_key = c('Mean syllables per word',
                           'Mean sentence length',
                           'Tokens per opinion',
                           'Type-token ratio'))
  data <- left_join(data, key_dict, by= 'key')


  date_breaks <- as.Date(paste0(seq(1950, 
                                    2010,
                                    by = 10),
                                '-01-01'))
  date_labels <- as.character(seq(1950, 
                                  2010,
                                  by = 10))

  x <- data %>%
    mutate(year = as.numeric(format(docdate, '%Y'))) %>%
    mutate(val = value) %>%
    group_by(year, new_key) %>%
    summarise(value = mean(value, na.rm = TRUE),
              p25 = quantile(val, na.rm = TRUE, 0.25),
              p75 = quantile(val, na.rm = TRUE, 0.75)) %>%
    ungroup %>%
    mutate(year = as.Date(paste0(year, '-01-01')))
  g2 <- 
    ggplot(data = x,
           aes(x = year,
               y = value)) +
    geom_ribbon(aes(x = year,
                    ymin = p25,
                    ymax = p75),
                alpha = 0.6) +
    geom_point(size = 2,
               alpha = 1) +
    geom_vline(xintercept = date_breaks,
               alpha = 0.3) +
    facet_wrap(~new_key, 
               ncol = 2,
               scales = 'free_y') +
    scale_x_date(name = 'Date', 
                 breaks = date_breaks, 
                 labels = date_labels) +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
    labs(x = '',
         y = '',
         title = 'Figure 1',
         subtitle = paste0('Development over time of four measures of style over time', 
                           '\n', 'Each opinion is represented by one point per panel'),
         caption = '*Std error smoothed using local regression. Showing only mean points.')  +
    theme(axis.text=element_text(size = 10, hjust = 1))

  g2

Frankenreiter 3 (version 1)

# Get data
  data <- all_data$frankenreiter$f3

  g1 <- ggplot(data, aes(diff, similarity)) +
    geom_point(size = 1, 
               alpha = 0.9,
               color = 'black') +
    geom_smooth(method = 'loess',
                linetype = 0,
                fill = 'black',
                alpha = 0.3) +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
    labs(x = '',
         y = '',
         title = 'Figure 3 Similarity and temporal distance',
         caption = '*Std error from local regression.') +
    theme(axis.text=element_text(size = 10, hjust = 1))
  g1

Frankenreiter 3 (version 2)

 #Get data
  data <- all_data$frankenreiter$f3
  # FILTERING OUT SOME DATA TO MATCH THEIRS
  data <- data %>%
    filter(!(similarity < 0.3 & diff < 25)) %>%
    filter(diff <= 57)

  liney <- data %>%
    group_by(diff) %>%
    summarise(similarity = mean(similarity, na.rm = TRUE)) %>%
    ungroup

  spiney <- data %>%
    mutate(diff = round(diff, digits = -1)) %>%
    group_by(diff) %>%
    summarise(similarity = mean(similarity, na.rm = TRUE)) %>%
    ungroup

  quanty <- data %>%
    mutate(diff = round(diff, digits = -1)) %>%
    group_by(diff) %>%
    summarise(avg = mean(similarity, na.rm = TRUE),
              med = median(similarity, na.rm = TRUE),
              q75 = quantile(similarity, 0.75, na.rm = TRUE),
              q25 = quantile(similarity, 0.25, na.rm = TRUE)) %>%
    ungroup


  g2 <- ggplot(data = data %>%
                 mutate(diff = round(diff, digits = -1)),
               aes(x = diff,
                   y = similarity)) +
    geom_violin(aes(group = factor(diff)),
                alpha = 1,
                fill = 'darkgrey',
                color = NA) +
    ylim(0, 1) +
    geom_jitter(alpha = 0.7,
                # pch = 1,
                size = 1) +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
    labs(x = '',
         y = '',
         title = 'Figure 3 Similarity and temporal distance',
         subtitle = '(Grouped every 10 years)',
         caption = paste0('*Distribution represented with mirrored normal density (violin plot).', '\n', 'Data points randomly "jittered" on the X axis.')) +
    theme(axis.text=element_text(size = 10, hjust = 1)) 

  g2

Frankenreiter 4 (version 1)

# Get data
  data <- all_data$frankenreiter$f4 %>%
    filter(diff >= 0) # we are making this modification to match the vis from the paper

  g1 <- ggplot(data, aes(diff, similarity)) +
    geom_smooth(method = 'loess',
                linetype = 0,
                fill = 'darkgrey',
                alpha = 1) +
    geom_point(size = 1, 
               alpha = 0.9,
               color = 'black') +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
    labs(x = '',
         y = '',
         title = paste0('Figure 4 Similarity between different', '\n' , 'judges as a function of time'),
         caption = '*Std error from local regression.') +
    theme(axis.text=element_text(size = 10, hjust = 1))


  g1
  # FILTERING OUT SOME DATA TO MATCH THEIRS

Frankenreiter 4 (version 2)

  # Get data
  data <- all_data$frankenreiter$f4 %>%
    filter(diff >= 0) # we are making this modification to match the vis from the paper


  liney <- data %>%
    group_by(diff) %>%
    summarise(similarity = mean(similarity, na.rm = TRUE)) %>%
    ungroup

  spiney <- data %>%
    mutate(diff = round(diff, digits = -1)) %>%
    group_by(diff) %>%
    summarise(similarity = mean(similarity, na.rm = TRUE)) %>%
    ungroup

  quanty <- data %>%
    mutate(diff = round(diff, digits = -1)) %>%
    group_by(diff) %>%
    summarise(avg = mean(similarity, na.rm = TRUE),
              med = median(similarity, na.rm = TRUE),
              q75 = quantile(similarity, 0.75, na.rm = TRUE),
              q25 = quantile(similarity, 0.25, na.rm = TRUE)) %>%
    ungroup


g2 <- ggplot(data = data %>%
                mutate(diff = round(diff, digits = -1)),
              aes(x = diff,
                  y = similarity)) +
    geom_jitter(alpha = 0.3,
                width = 2,
                # pch = 1,
                size = 1) +
  geom_violin(aes(group = factor(diff)),
                alpha = 0.8,
                width = 10,
                fill = 'darkgrey',
                color = NA) +
    ylim(0, 1) +
    theme_sfi(lp = 'none',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold') +
  labs(x = '',
       y = '',
       title = paste0('Figure 4 Similarity between different', '\n' , 'judges as a function of time'),
       subtitle = '(Grouped every 10 years)',
        caption = paste0('*Distribution represented with mirrored normal density (violin plot).', '\n', 'Data points randomly "jittered" on the X axis.'))  +
    theme(axis.text=element_text(size = 10, hjust = 1))

g2

Frankenreiter 5

1955 - 2014

  # Get data
  data <- all_data$frankenreiter$f5

  # # assign Metrix MDS (1955-2014) to source 1
  # # assign Metrix MDS (1970-2014) to source 2
  # data$source <- ifelse(data$source == '1', 'Metrix MDS (1955-2014)', 
  #                       'Metrix MDS (1970-2014)')

  # data$source <- as.factor(data$source)

  data1 <- data[data$source == 1,]


  # plot the 1955 to 2014 data
  cols <- make_colors(length(unique(data1$year)), bw = TRUE)
  data1$year <- as.numeric(data1$year)
  g1 <- ggplot(data1,
         aes(x = coord2,
             y = coord1,
             color = year)) +
    geom_point(size = 4,
               pch = 16,
               alpha = 0.9)  +
    ggrepel::geom_text_repel(data=subset(data1, year ==  1973 | year==  1981 | year == 1986 | year ==1995 | year == 2004 ),
                             aes(coord2, coord1, label=year), vjust = -1, hjust = 1.5) +
    ggrepel::geom_text_repel(data=subset(data1, year ==  2007 | year==  2013),
                             aes(coord2, coord1, label=year), vjust = 1, hjust = -1.5) +
    theme_sfi(lp = 'bottom',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold', 
              lkw = TRUE, 
              lkt = 'point', 
              legend_width = 30) +
    labs(x = 'Coordinate 2',
         y = 'Coordinate 1',
         title = 'Figure 5: Metrix MDS (1955 - 2014)',
         subtitle = 'Multidimensional scaling of KL divergences between different years') +
    scale_color_gradient(name = 'Year', low = "#2C2C2C", high = "#ABABAB") 

g1

Frankenreiter 5

1970 - 2014

# Get data
  data <- all_data$frankenreiter$f5


  data2 <- data[data$source == 2,]
  cols <- make_colors(length(unique(data2$year)), bw = TRUE)
  data2$year <- as.numeric(data2$year)
  g2 <- ggplot(data2,
               aes(x = coord2,
                   y = coord1,
                   color = year)) +
    geom_point(size = 4,
               pch = 16,
               alpha = 0.9)  +
     ggrepel::geom_text_repel(data=subset(data2, year ==  1970 | year == 1973 | year==  1981 | year == 1986 | year == 1990 | year == 1995 ),
                             aes(coord2, coord1, label=year), vjust = -1, hjust = 1.5) +
    ggrepel::geom_text_repel(data=subset(data2,year == 2000 | year == 2004 | year ==  2007 | year==  2010 | year == 2014),
                             aes(coord2, coord1, label=year), vjust = 1, hjust = -1.5) +
    theme_sfi(lp = 'bottom',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold', 
              lkw = TRUE, 
              lkt = 'point', 
              legend_width = 30) +
    labs(x = 'Coordinate 2',
         y = 'Coordinate 1',
         title = 'Figure 5: Metrix MDS (1970-2014)',
         subtitle = 'Multidimensional scaling of KL divergences between different years') +
    scale_color_gradient(name = 'Year', low = "#2C2C2C", high = "#ABABAB") +
    theme(axis.text=element_text(size = 10, hjust = 1))


g2

Frankenreiter 6 1973 enlargement

# Get data
  data <- all_data$frankenreiter$f6 %>%
    mutate(year = as.numeric(year))

  # recode new variable
  data$new <- as.factor(ifelse(data$new == 1, 'New judges', 'Old judges'))

  # separate data into two datasets based on key
  data_1 <- data[data$key == '1973 enlargement',]

  # capitalize data 
  data_1$judgetrad <- as.factor(Hmisc::capitalize(data_1$judgetrad))

  # keep only last name
  data_1$judgename <- gsub('Mertens de Wilmars', 'de Wilmars', data_1$judgename)
  data_1$judgename <- gsub('Mackenzie Stuart', 'Stuart', data_1$judgename)

  # correct variable 'new' levles
  data_1$new <- factor(data_1$new, levels = c('Old judges', 'New judges'))
  data_1$judgetrad<- factor(data_1$judgetrad, levels = c('Roman', 'German','Nordic', 'Commonlaw'))

# 
#   # recode data so the new judgetrad variable indicates if the judge is new
#   data_1$judgetrad <- ifelse(data_1$new == 1, paste0(data_1$judgetrad, ' New'), data_1$judgetrad)

  # -----------------------
  # 1973 enlargement

  # Version 1 with lines at end
  g1 <- ggplot(data = data_1,
               aes(x = year,
                   y = kl.dist3,
                   group = interaction(judgetrad,judgename),
                   color = judgetrad)) +
    geom_line(size = 1) +
    geom_point(size = 1,
               alpha = 0.6,
               color = 'black') +
    xlim(c(1973, 1992)) +
    geom_dl(aes(label = judgename), 
            method = list(dl.combine("last.points"), cex = 0.6,hjust = -0.1, 
                          dl.move('Donner', cex = 0.6,vjust = -0.3)), 
                          alpha = 0.8, color = 'black') +
    scale_color_manual(name = '',
                       breaks = c('Roman', 'German', 'Nordic', 'Commonlaw'),
                       values = c('#000000', '#434343', '#7F7F7F', '#ABABAB')) +
    labs(x = 'Year',
         y = '',
         title = 'Figure 6 (1973 Enlargement)',
         subtitle = 'Development of writing style of ECJ in comparison to the writing of judges between 1973 and 1975') +
    theme_sfi(lp = 'bottom',
              y_axis_title_style = 'bold',
              x_axis_title_style = 'bold',
              title_style = 'bold')  +
    theme(axis.text=element_text(size = 10, hjust = 1)) +
    facet_wrap(~new)

  g1

Frankenreiter 6 1995 enlargement

# Get data
  data <- all_data$frankenreiter$f6 %>%
    mutate(year = as.numeric(year))
 # recode new variable
  data$new <- ifelse(data$new == 1, 'New judges', 'Old judges')
  data_2 <- data[data$key == '1995 enlargement',]
  data_2$judgetrad <- Hmisc::capitalize(data_2$judgetrad)

  # correct variable 'new' levles
  data_2$new <- factor(data_2$new, levels = c('Old judges', 'New judges'))
  data_2$judgetrad<- factor(data_2$judgetrad, levels = c('Roman', 'German','Nordic', 'Commonlaw'))


  data_2$judgename <- gsub('Moitinho de Almeida', 'de Almeida', data_2$judgename)
  data_2$judgename <- gsub('Rodriguez Iglesias', 'Iglesias', data_2$judgename)

  data_2$judge_label <- ''
  data_list <- list()
  unique_names <- unique(data_2$judgename)

  for(i in 1:length(unique_names)) {
    this_name <- unique_names[i]
    sub_dat <- data_2[data_2$judgename == this_name,]
    sub_dat <- sub_dat[order(sub_dat$kl.dist3, decreasing = TRUE),]
    sub_dat$judge_label[i] <- this_name
    data_list[[i]] <- sub_dat
    print(this_name)

  }
  data_2 <- do.call('rbind', data_list)

  # judge names: Wathelet, Puissochet, Murray, de Almeida, Edward
  # data_2$judgetrad <- ifelse(data_2$new == 1, paste0(data_2$judgetrad, ' New'), data_2$judgetrad)
library(ggrepel)
    g3 <- ggplot(data = data_2,
                 aes(x = year,
                     y = kl.dist3,
                     group = interaction(judgetrad,judgename),
                     color = judgetrad)) +
      ylim(c(0, .03)) +
      geom_point(size = 1, 
                 color = 'black',
                 alpha = 0.4) +
      geom_line(size = 1) +
      geom_dl(data = data_2[data_2$new == 'New judges',],
              aes(label = judgename), 
            method = list(dl.combine("last.points"), hjust = -0.1,  cex = 0.6,
                          dl.move('Ragnemalm', hjust = 0.1, vjust = -0.4,  cex = 0.6),
                          dl.move('Wathelet', hjust = 2, vjust = -0.4,  cex = 0.6)), 
                          alpha = 0.8, color = 'black') +
      geom_dl(data = data_2 %>% filter(judgename %in% c('Puissochet','Wathelet', 'Edward')),
              aes(label = judgename), 
            method = list(dl.combine("first.points"),  hjust = 1.1,  cex = 0.6),
                          alpha = 0.8, color = 'black')  +
      geom_dl(data = data_2 %>% filter(judgename %in% c('Hirsch','Mancini', 'Iglesias', 'de Almeida','Kakouris', 'Kapteyn', 'Gulmann', 'Murray')),
              aes(label = judgename), 
            method = list(dl.combine("last.points"),  hjust = -0.1,  cex = 0.6),
                          alpha = 0.8, color = 'black')  +
      scale_color_manual(name = '',
                       breaks = c('Roman', 'German', 'Nordic', 'Commonlaw'),
                       values = c('#000000', '#434343', '#7F7F7F', '#ABABAB')) +
      xlim(c(1992, 2015)) +
      labs(x = 'Year',
           y = '',
           title = 'Figure 6 (1995 Enlargement)',
           subtitle = 'Development of writing style of ECJ in comparison to the writing of judges between 1973 and 1975') +
      theme_sfi(lp = 'bottom',
                y_axis_title_style = 'bold',
                x_axis_title_style = 'bold',
                title_style = 'bold') +
    theme(axis.text=element_text(size = 8, hjust = 1)) +
            facet_wrap(~new) 


    g3


databrew/sfi documentation built on May 29, 2019, 1:52 a.m.