In databrew/sfi:

# Packages
library(sfi)
library(webshot)
library(ggplot2)
library(dplyr)
library(plotly)
library(ggiraph)
library(scales)
library(tidyverse)
library(knitr)
library(Hmisc)
library(RColorBrewer)
library(extrafont)
library(kableExtra)
library(grid)

# webshot::install_phantomjs()

loadfonts()
## Global options
options(max.print="75")
opts_chunk$set(echo=FALSE,
            cache=FALSE,
              prompt=FALSE,


              tidy=TRUE,
              comment=NA,
              message=FALSE,
              warning=FALSE,
              dpi = 300,
              # dev = "cairo_pdf",
              dev = c("png", "cairo_pdf"),
              fig.pos="!h",
              fig.path = 'figures/')
opts_knit$set(width=75)
options(xtable.comment = FALSE)

This markdown is for Dumas figures.

\newpage

# Dumas 1a (Version 1)

# Get data
  data <- all_data$dumas$f1

  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('Restrictive', data$label), 
                        'Restrictive', 'Permissive')

  # recode x and y to fit document
  # names(data) <- c('label', 'True negative rate', 'True positive rate', 'run', 'filter')

  # --------------------------------------------------
  # g4
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               col = filter)) +
    geom_point(size = 1,
               alpha = 0.8) +
    geom_line(size = 0.5, 
              alpha = 1) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    labs(title = 'Appeals level',
         subtitle = 'Restrictive vs Permissive Filters',
         caption = paste0('The ROC curves of the language model')) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'darkgrey')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 1,
                alpha = 0.45) +
    facet_wrap(~run, ncol = 3) +
    theme_sfi(lp = 'bottom')  +
    theme(axis.text=element_text(size = 10, hjust = 1))

# Dumas 1a (Version 2)

g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               color = filter)) +
    geom_line(size = 1,
               alpha = 0.8,
               show.legend = FALSE) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    labs(title = 'Appeals level',
         subtitle = 'Restrictive vs Permissive Filters',
         caption = paste0('The ROC curves of the language model', '\n',  
                          '*lines smoothed by a local regression')) +
    # scale_color_manual(name = 'Filter type',
    #                    values = c('black', 'grey')) +
    scale_color_manual(name = '',
                       values = c('black', 'darkgrey')) +
    geom_abline(intercept = 1,
                color = 'black',
                # linetype = 'dashed',
                slope = 1,
                size = .7,
                alpha = 0.6) +
    facet_wrap(~run, ncol = 3) +
    theme_sfi(lp = 'bottom') +
    theme(axis.text=element_text(size = 10, hjust = 1))

Dumas 1a (Version 3)

# Get data
  data <- all_data$dumas$f1

  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('Restrictive', data$label), 
                        'Restrictive', 'Permissive')

  # recode x and y to fit document
  # names(data) <- c('label', 'True negative rate', 'True positive rate', 'run', 'filter')

  # --------------------------------------------------
  # g4
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               col = filter)) +
    geom_point(size = 1,
               alpha = 0.6) +
    geom_line(size = 0.5, 
              alpha = 1) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    labs(title = 'Appeals level',
         subtitle = 'Restrictive vs Permissive Filters',
         caption = paste0('The ROC curves of the language model')) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'darkgrey')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 1,
                alpha = 0.45) +
    facet_wrap(~run, nrow = 3) +
    theme_sfi(lp = 'bottom') +
    theme(axis.text=element_text(size = 10, hjust = 1))


  g1

# Dumas 1b (Version 1)

# Get data
  data <- all_data$dumas$f2


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('Restrictive', data$label, ignore.case = TRUE), 
                        'Restrictive', 'Permissive')


  # --------------------------------------------------
  # g1
  # # facet wrap
  # g1 <- 
  #   ggplot(data, 
  #          aes(x = `True negative rate`,
  #              y =`True positive rate`,
  #              col = filter)) +
  #   geom_point(size = 0.5, 
  #             alpha = 0.8) +
  #   geom_line(size = 0.5, 
  #             alpha = 1) +
  #   xlim(c(1, 0)) +
  #   ylim(c(0, 1)) +
  #   labs(title = 'District Level',
  #        subtitle  = 'Restrictive vs Permissive Filters',
  #        caption = paste0('The ROC curves of the language model')) +
  #   scale_color_manual(name = 'Filter type',
  #                      values = c('black', 'grey')) +
  #   geom_abline(intercept = 1,
  #               color = 'black',
  #               slope = 1,
  #               size = 1,
  #               alpha = 0.6) +
  #   facet_wrap(~run, ncol = 3) +
  #   theme_sfi(lp = 'bottom') +
  #   theme(axis.text=element_text(size = 10, hjust = 1)) 
  # 
  # 
  # g1

# Get data
# Dumas 1b (Version 2)


  data <- all_data$dumas$f2


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('Restrictive', data$label, ignore.case = TRUE), 
                        'Restrictive', 'Permissive')


  # --------------------------------------------------
  # g1
  # # facet wrap
  # g1 <- 
  #   ggplot(data, 
  #          aes(x = `True negative rate`,
  #              y =`True positive rate`,
  #              col = filter)) +
  #   geom_line(size = 0.5, 
  #             alpha = 1) +
  #   xlim(c(1, 0)) +
  #   ylim(c(0, 1)) +
  #   labs(title = 'District Level',
  #        subtitle  = 'Restrictive vs Permissive Filters',
  #        caption = paste0('The ROC curves of the language model')) +
  #   scale_color_manual(name = 'Filter type',
  #                      values = c('black', 'grey')) +
  #   geom_abline(intercept = 1,
  #               color = 'black',
  #               slope = 1,
  #               size = 0.7,
  #               alpha = 0.6) +
  #   facet_wrap(~run, ncol = 3) +
  #   theme_sfi(lp = 'bottom') +
  #   theme(axis.text=element_text(size = 10, hjust = 1))
  # 
  #

Dumas 1b (Version 3)

# Get data
  data <- all_data$dumas$f2


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('Restrictive', data$label, ignore.case = TRUE), 
                        'Restrictive', 'Permissive')


  # --------------------------------------------------
  # g1
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               col = filter)) +
    geom_point(size = 1,
               alpha = 0.6) +
    geom_line(size = 0.5, 
              alpha = 1) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    labs(title = 'District Level',
         subtitle  = 'Restrictive vs Permissive Filters',
         caption = paste0('The ROC curves of the language model')) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'grey')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 0.7,
                alpha = 0.6) +
    facet_wrap(~run, nrow = 3) +
    theme_sfi(lp = 'bottom') +
    theme(axis.text=element_text(size = 10, hjust = 1))


  g1

# Dumas 2a (Verion 1)

  # Get data
  data <- all_data$dumas$f3


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  # --------------------------------------------------
  # g4
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               color = filter)) +
    geom_point(size = 1,
               alpha = 0.8) +
    geom_line(size = 1,
               alpha = 0.6) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'grey')) +
    labs(title = 'Appeals level',
         subtitle = 'Permissive vs Permissive Filters with embeddings',
         caption = paste0('The ROC curves of the language model')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 1,
                alpha = 0.6) +
    facet_wrap(~run, ncol = 3) +
    theme_sfi(lp = 'bottom',
              x_axis_title_style = 'bold',
              y_axis_title_style = 'bold') +
    theme(axis.text=element_text(size = 10, hjust = 1))

# Dumas 2a (Verion 2)

  # Get data
  data <- all_data$dumas$f3


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  # --------------------------------------------------
  # g4
  # # facet wrap
  # g1 <- 
  #   ggplot(data, 
  #          aes(x = `True negative rate`,
  #              y =`True positive rate`,
  #              color = filter)) +
  #   geom_line(size = 1,
  #              alpha = 0.6) +
  #   xlim(c(1, 0)) +
  #   ylim(c(0, 1)) +
  #   scale_color_manual(name = 'Filter type',
  #                      values = c('black', 'grey')) +
  #   labs(title = 'Appeals level',
  #        subtitle = 'Permissive vs Permissive Filters with embeddings',
  #        caption = paste0('The ROC curves of the language model')) +
  #   geom_abline(intercept = 1,
  #               color = 'black',
  #               slope = 1,
  #               size = 0.7,
  #               alpha = 0.6) +
  #   facet_wrap(~run, ncol = 3) +
  #   theme_sfi(lp = 'bottom',
  #             x_axis_title_style = 'bold',
  #             y_axis_title_style = 'bold') +
  #   theme(axis.text=element_text(size = 10, hjust = 1))  
  # 
  # g1

Dumas 2a (Version 3)

 # Get data
  data <- all_data$dumas$f3


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  ## --------------------------------------------------
  # g1
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               color = filter)) +
    geom_point(size = 1,
               alpha = 0.6) +
    geom_line(size = 1,
              alpha = .8) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'grey')) +
    labs(title = 'District Level',
         subtitle = 'Permissive vs Permissive Filters with embeddings',
         caption = paste0('The ROC curves of the language model')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 0.7,
                alpha = 0.6) +
    facet_wrap(~run, nrow = 3) +
    theme_sfi(lp = 'bottom',
              x_axis_title_style = 'bold',
              y_axis_title_style = 'bold') +
    theme(axis.text=element_text(size = 10, hjust = 1))

  g1

# Dumas 2b (Verion 1)

  # Get data
  data <- all_data$dumas$f4


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  # --------------------------------------------------
  # g4
  # facet wrap
  # g1 <- 
  #   ggplot(data, 
  #          aes(x = `True negative rate`,
  #              y =`True positive rate`,
  #              color = filter)) +
  #   geom_point(size = 1,
  #              alpha = 0.8) +
  #   geom_line(size = 1,
  #              alpha = 0.6) +
  #   xlim(c(1, 0)) +
  #   ylim(c(0, 1)) +
  #   scale_color_manual(name = 'Filter type',
  #                      values = c('black', 'grey')) +
  #   labs(title = 'Appeals level',
  #        subtitle = 'Permissive vs Permissive Filters with embeddings',
  #        caption = paste0('The ROC curves of the language model')) +
  #   geom_abline(intercept = 1,
  #               color = 'black',
  #               slope = 1,
  #               size = 1,
  #               alpha = 0.6) +
  #   facet_wrap(~run, ncol = 3) +
  #   theme_sfi(lp = 'bottom',
  #             x_axis_title_style = 'bold',
  #             y_axis_title_style = 'bold') +
  #   theme(axis.text=element_text(size = 10, hjust = 1))   
  #

# Dumas 2b (Version 2)

 # Get data
  data <- all_data$dumas$f4


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  ## --------------------------------------------------
  # g1
  # facet wrap
  # g1 <- 
  #   ggplot(data, 
  #          aes(x = `True negative rate`,
  #              y =`True positive rate`,
  #              color = filter)) +
  #   geom_line(size = 1,
  #              alpha = 0.6) +
  #   xlim(c(1, 0)) +
  #   ylim(c(0, 1)) +
  #   scale_color_manual(name = 'Filter type',
  #                      values = c('black', 'grey')) +
  #   labs(title = 'District Level',
  #        subtitle = 'Permissive vs Permissive Filters with embeddings',
  #        caption = paste0('The ROC curves of the language model')) +
  #   geom_abline(intercept = 1,
  #               color = 'black',
  #               slope = 1,
  #               size = 1,
  #               alpha = 0.6) +
  #   facet_wrap(~run, ncol = 3) +
  #   theme_sfi(lp = 'bottom',
  #             x_axis_title_style = 'bold',
  #             y_axis_title_style = 'bold') +
  #   theme(axis.text=element_text(size = 10, hjust = 1))

Dumas 2b (Version 3)

 # Get data
  data <- all_data$dumas$f4


  # create a variable to represent the colors 
  data$run <- ifelse(grepl('1', data$label), 'Run 1',
                     ifelse(grepl('2', data$label), 'Run 2',
                            'Run 3'))

  data$filter <- ifelse(grepl('with embeddings', data$label), 
                        'Permissive embeddings', 'Permisive')


  ## --------------------------------------------------
  # g1
  # facet wrap
  g1 <- 
    ggplot(data, 
           aes(x = `True negative rate`,
               y =`True positive rate`,
               color = filter)) +
    geom_point(size = 1, 
               alpha = 0.8) +
    geom_line(size = 1,
               alpha = 0.6) +
    xlim(c(1, 0)) +
    ylim(c(0, 1)) +
    scale_color_manual(name = 'Filter type',
                       values = c('black', 'grey')) +
    labs(title = 'District Level',
         subtitle = 'Permissive vs Permissive Filters with embeddings',
         caption = paste0('The ROC curves of the language model')) +
    geom_abline(intercept = 1,
                color = 'black',
                slope = 1,
                size = 1,
                alpha = 0.6) +
    facet_wrap(~run, nrow = 3) +
    theme_sfi(lp = 'bottom',
              x_axis_title_style = 'bold',
              y_axis_title_style = 'bold') +
    theme(axis.text=element_text(size = 10, hjust = 1)) 

  g1