In joebrew/vilaweb: Reproducible data analysis for www.vilaweb.cat

# Basic knitr options
library(knitr)
opts_chunk$set(comment = NA, 
               echo = FALSE, 
               warning = FALSE, 
               message = FALSE, 
               error = TRUE, 
               cache = FALSE,
               fig.path = 'figures/')

# Libraries
library(vilaweb)
library(rtweet)
library(tidyverse)
library(databrew)

# Set up
source('get_data.R')
# There is now an object named tl, with all tweets
# Ensure no duplicates
tl <- tl %>% dplyr::distinct(.keep_all = TRUE) %>%
  filter(!duplicated(id))

# # Read temp data <-
# out_list <- list()
# peeps <- dir('data')
# # peeps <- gsub('_tweets', '', peeps)
# for(i in 1:length(peeps)){
#   peep <- peeps[i]
#   dat <- read_csv(paste0('data/', peep, '/tweets.csv'))
#   out_list[[i]] <- dat
# }
# tl <-bind_rows(out_list)

# Filter out low-posters
removers <- c('eva_granados',
              'carmencalvo_',
              'meritxell_batet',
              'alejandrotgn')

# Flag the violence posts
tl <- tl %>%
  filter(!username %in% removers) %>%
  # Only keep those who have posts going back to Sep 2017
  group_by(username) %>%
  filter(min(date) <= '2017-09-01') %>%
  ungroup %>%
  # Identify violence posts, but filter out the violencia de genero ones
  mutate(violence = grepl('violen', tolower(tweet))) %>%
  mutate(genero = grepl('machis|masclis|mujer|dona|done|géner|gèner|genero|marido|marit|novio|esposa|novia|doméstic|domèstic', tolower(tweet))) %>%
  mutate(violence_not_genero = violence & !genero) %>%
  mutate(tumultos = grepl('tumult', tolower(tweet))) %>%
  mutate(atac = grepl('atac|ataq', tolower(tweet))) %>%
  mutate(rebelion = grepl('rebel', tolower(tweet))) %>%
  mutate(comandos = grepl('comando', tolower(tweet))) %>%
  mutate(asalt = grepl('asalt', tolower(tweet))) %>%
  mutate(indepe = grepl('indepe', tolower(tweet))) %>%
  mutate(violence_indepe = violence_not_genero & indepe) %>%
  mutate(ramblas = grepl('ramblas', tolower(tweet))) %>%
  mutate(terror = grepl('terroris', tolower(tweet))) %>%
  # Flag the newspaper posts
  mutate(newspaper = tolower(username) %in% tolower(c(
    'ElMundoEspana',
    'elpais_espana',
    'LaVanguardia',
    'cronicaglobal',
    'elespanolcom',
    'elperiodico',
    'elconfidencial',
    'OKDIARIO',
    'enoticiescat'))) %>%
  mutate(party = tolower(username) %in% tolower(c(
            'PSOE',
            'socialistes_cat',
            'CiutadansCs',
            'PPopular',
            'PPCatalunya',
            'vox_es',
            'CiudadanosCs',
            'Societatcc'))) %>%
  mutate(type = ifelse(newspaper, 'Newspaper', 
                       ifelse(party, 'Party/group',
                              'Person')))




# By time
bt <- 
  tl %>%
  mutate(year_month = date_truncate(date, level = 'week') + 3) %>%
  group_by(type, date = year_month, 
           person = username) %>%
  summarise(n = length(which(violence_not_genero)),
            n_atac = length(which(atac)),
            n_violence_indepe = length(which(violence_indepe)),
            n_asalt = length(which(asalt)),
            n_comandos = length(which(comandos)),
            n_rebelion = length(which(rebelion)),
            d = n()) %>%
  ungroup %>%
  mutate(p = n / d * 100,
         p_atac = n_atac / d * 100,
         p_asalt = n_asalt / d * 100,
         p_comandos = n_comandos / d * 100,
         p_rebelion = n_rebelion / d * 100) %>%
  filter(date >= '2017-08-01',
         date <= '2018-07-31')

# Aggregate too
agg <- bt %>%
  group_by(date) %>%
  summarise(n = sum(n)) %>%
  ungroup
agg_type <- bt %>%
  group_by(date, type) %>%
  summarise(n = sum(n)) %>%
  ungroup



# Define the important labels
label_df <-
  data_frame(date = as.Date(c('2017-09-20',
                              '2017-10-01',
                              # '2017-10-17',
                              '2017-10-27',
                              # '2017-11-02',
                              '2017-12-21',
                              '2018-03-25',
                              # '2018-07-20',
                              '2018-10-01')),
             label_ca = c('20-S',
                       '1-O',
                       # 'Jordis entren a la presó',
                       '155',
                       # 'Ministres a la presó',
                       'Eleccions',
                       'Puigdemont detingut',
                       # 'Llarena cancela\nordres de detenció',
                       'Manifestacions al Parlament'),
             label_en = c('20-S',
                       '1-O',
                       # 'Jordis imprisoned',
                       # 'Ministers imprisoned',
                       '155',
                       'Elections',
                       'Puigdemont arrested',
                       # 'Llarena cancels\narrest warrants',
                       'Protests at Parlament')) %>%
  mutate(label = label_en) %>%
  mutate(date_merge = date_truncate(date, 'week') + 3) %>%
  mutate(y = ((1:length(label_ca)) %% 2 * -1)-1) %>%
  mutate(yend = -0.5) %>%
  filter(date <= max(bt$date))

# ggplot(data = bt,
#        aes(x = date,
#            y = n_atac)) +
#   geom_bar(stat = 'identity',
#            width = 7) +
#     facet_wrap(~person, scales = 'free_y')

Executive summary

This document contains a technical / methodological explanation of the analysis carried out for the article "The tall tale of violence", published HERE.

Overview

The study examined the frequency of use of the term "violence" (as well as the related terms "violent", etc.) on twitter among well-known pro-union Spanish and Catalan politicians. Given that there is a great deal of controversy in Spain regarding whether the charge of "rebellion" - a crime which, by definition in the Spanish Penal Code, requires violence - is appropriate, the purpose of this study was to see examine the frequency of the word "violence" on twitter to see if there was notable violence in Catalonia.

The hypothesis

If violence took place, then politicians will have talked about it.

If there was indeed a violent uprising or violent rebellion in Catalonia in September and October of 2017, we would expect that Catalan and Spanish politicians, particularly those most opposed to indepenence, will have noticed, talked, and tweeted about the violence.

The data

We examined tweets from the year beginning in August 2017. We limited our analysis to individuals and groups which are most opposed to the Catalan independence movement, since these accounts were most likely to discuss the supposed violence of independentists. In total, our analysis covered r sum(bt$d) tweets from r length(unique(bt$person)) twitter accounts.

Acquiring data

We "scraped" twitter for all publicly available tweets from the following accounts. Data were acquired on 5 December 2018. Tools from the open source python twint library were used, as well as custom code in R (publicly available here).

The methods

Filtering data

Having acquired data, we searched all tweets for the string "violen". As the root of the word "violence", "violencia", "violentos", etc. it captured tweets about violence. In order to reduce noise, we filtered out tweets about domestic/gender violence, using the following search string:

'machis|masclis|mujer|dona|done|géner|gèner|genero|marido|marit|novio|esposa|novia|doméstic|domèstic'

(In the above, the vertical bar can be understood as meaning "OR")

We restricted our analysis to the 1 year period between August 1, 2017 and July 31, 2018.

Aggregation and counting

We grouped the resultant data by person/account and week, and calculated the sum of the total number of violence-flagged tweets (excluding those flagged as domestic/gender violence) for each person-week

The results

Tweets from politicians and parties

The below chart shows all tweets containing the words violence/violent during the time period in question (excluding tweets algorithmically identified as being specific to gender/domestic violence).

# PEOPLE/PARTY PLOT
make_people_plot <- function(language = 'en', return_table = FALSE){
  plot_data <-   tl %>%
  mutate(year_month = date_truncate(date, level = 'week') + 3) %>%
  group_by(type, date = year_month, 
           person = username) %>%
  summarise(n = length(which(violence_not_genero)),
            n_atac = length(which(atac)),
            n_violence_indepe = length(which(violence_indepe)),
            n_asalt = length(which(asalt)),
            n_comandos = length(which(comandos)),
            n_rebelion = length(which(rebelion)),
            d = n()) %>%
  ungroup %>%
  mutate(p = n / d * 100,
         p_atac = n_atac / d * 100,
         p_asalt = n_asalt / d * 100,
         p_comandos = n_comandos / d * 100,
         p_rebelion = n_rebelion / d * 100) 
    plot_data$person<- paste0('@', plot_data$person)

   if(return_table){
      plot_data <- plot_data %>%
        dplyr::rename(month = date,
                      total_tweets = d,
                      violence_tweets = n,
                      assault_tweets = n_asalt,
                      comandos_tweets = n_comandos,
                      rebelion_tweets = n_rebelion) %>%
        dplyr::select(person, month, violence_tweets, assault_tweets, comandos_tweets, rebelion_tweets, total_tweets)
    return(plot_data)
   }
    plot_data <- plot_data %>%
  filter(date >= '2017-08-01',
         date <= '2018-07-31')
  plot_data <- plot_data %>% filter(type %in% c('Person', 'Party/group'))
  gapper <- max(agg_type$n[agg_type$type %in% c('Person', 'Party/group')]) / 10
  date_breaks <- sort(unique(plot_data$date))


  if(language == 'en'){
    title = 'Tweets containing the word "violence"*'
         subtitle = 'Weekly, August 2017-July 2018'
         caption = '*Removed tweets identified as being specific to gender violence: "machista, masclisme, mujer, dona, género, marido, etc.".\n*A tweet was flagged as containing the word "violence" if it contained the sub-string "violen", ie "violencia", "violentos"...\nData extracted/processed and chart created on 5 December 2018. Joe Brew. @joethebrew.'
         x = 'Week'
         y = 'Tweets'
     label_df$label <- label_df$label_en
  } else {
    title = 'Tuits amb la paraula "violència"*'
         subtitle = 'Setmanal, Agost 2017-Juiol 2018'
         caption = '*Es va treure els tuits identificats com relacionats amb la violència de gènere fent servir les paraules: "machista, masclisme, mujer, dona, género, marido, etc.".\n*Es considera que un tuit conté la paraula "violència" si conté les lletres "violen", o sigui "violencia", "violento", etc.\nDades descarregades/processades i gràfic creat el 5 de desembre 2018. Joe Brew. @joethebrew.'
         x = 'Setmana'
         y = 'Tuits'
       label_df$label <- label_df$label_ca
  }


  g <- ggplot(data = plot_data,
         aes(x = date,
             y = n)) +
    geom_bar(stat = 'identity',
             aes(group = person,
             fill = person),
             width = 7) +
    theme_vilaweb() +
    labs(title = title,
         caption = caption,
         subtitle = subtitle,
         x = x,
         y = y) +
    geom_text(data = label_df,
               aes(x = date,
                   y = (gapper*y)-(gapper * 0.5),
                   label = label),
              alpha = 0.6,
              size = 4) +
    geom_point(data = label_df,
               pch = '',
               aes(x = date,
                   y = label_df$yend[1])) +
    scale_fill_manual(name = '',
                      values = databrew::make_colors(n = length(unique(plot_data$person)))) +
    # geom_line(stat = 'smooth', method = 'auto',
    #           # formula = y ~ poly(x, 1),
    #           size = 1, alpha = 0.8,
    #           data = agg_type %>% filter(type %in% c('Person', 'Party/group')) %>%
    #             group_by(date) %>%
    #             summarise(n = sum(n)) %>%
    #             ungroup,
    #             aes(x = date,
    #                 y = n)) +
    geom_path(data = label_df %>% mutate(y = y * gapper) %>% gather(key, value, y:yend),
                 aes(x = date,
                     y = value,
                     group = date),
              alpha = 0.6) +
    guides(fill = guide_legend(ncol = 4)) +
    theme(axis.text.x = element_text(size = 13),
          plot.caption = element_text(size = 7),
          plot.subtitle = element_text(size = 16),
          plot.title = element_text(size = 20),
          legend.text = element_text(size = 10)) +
    geom_bar(data = agg_type %>% filter(type %in% c('Person', 'Party/group')) %>%
               group_by(date)%>%
               summarise(n = sum(n)),
             stat = 'identity',
             width = 7,
             fill = NA,
             color = 'black',
             lwd = 0.3) +
    scale_y_continuous(breaks = seq(0, 100, 5))
  return(g)
}

make_people_plot('en')

Tweets about violence from newspapers

Newspapers followed a similar trend, albeit with more emphasis on violence in September and October of 2017. However, nearly all coverage of September/October 2017 violence by newspapers was of violence carried out by the State, not against the State.

make_newspaper_plot <- function(language = 'en'){


    if(language == 'en'){
    title = 'Tweets containing the word "violence"*'
         subtitle = 'Weekly, August 2017-July 2018'
         caption = '*Removed tweets identified as being specific to gender violence: "machista, masclisme, mujer, dona, género, marido, etc.".\n*A tweet was flagged as containing the word "violence" if it contained the sub-string "violen", ie "violencia", "violentos"...\nData extracted/processed and chart created on 5 December 2018. Joe Brew. @joethebrew.'
         x = 'Week'
         y = 'Tweets'
     label_df$label <- label_df$label_en
  } else {
    title = 'Tuits amb la paraula "violència"*'
         subtitle = 'Setmanal, Agost 2017-Juiol 2018'
         caption = '*Es va treure els tuits identificats com relacionats amb la violència de gènere fent servir les paraules: "machista, masclisme, mujer, dona, género, marido, etc.".\n*Es considera que un tuit conté la paraula "violència" si conté les lletres "violen", o sigui "violencia", "violento", etc.\nDades descarregades/processades i gràfic creat el 5 de desembre 2018. Joe Brew. @joethebrew.'
         x = 'Setmana'
         y = 'Tuits'
       label_df$label <- label_df$label_ca
  }

  # Newspaper plot
plot_data <- bt %>% filter(type == 'Newspaper')
plot_data$person<- paste0('@', plot_data$person)

gapper <- max(agg_type$n[agg_type$type == 'Newspaper']) / 15
date_breaks <- sort(unique(plot_data$date))
g <- ggplot(data = plot_data,
       aes(x = date,
           y = n)) +
  geom_bar(stat = 'identity',
           aes(group = person,
           fill = person),
           width = 7) +
  theme_vilaweb() +
  labs(title = title,
       subtitle = subtitle,
       x = x,
       y = y,
       caption = caption) +
  geom_text(data = label_df,
             aes(x = date,
                 y = (gapper*y)-(gapper * 0.5),
                 label = label),
            alpha = 0.6,
            size = 4) +
  geom_point(data = label_df,
             aes(x = date,
                 y = label_df$yend[1])) +
  scale_fill_manual(name = '',
                    values = databrew::make_colors(n = length(unique(plot_data$person)))) +
  # geom_line(stat = 'smooth', method = 'auto', 
  #           # formula = y ~ poly(x, 1),
  #           size = 1, alpha = 0.8,
  #           data = agg_type %>% filter(type == 'Newspaper'),
  #             aes(x = date,
  #                 y = n)) +
  geom_path(data = label_df %>% mutate(y = y * gapper) %>% gather(key, value, y:yend),
               aes(x = date,
                   y = value,
                   group = date),
            alpha = 0.6) +
  guides(fill = guide_legend(ncol = 3)) +
  theme(axis.text.x = element_text(size = 13),
        plot.caption = element_text(size = 7),
        plot.subtitle = element_text(size = 16),
        plot.title = element_text(size = 20),
        legend.text = element_text(size = 10)) +
  geom_bar(data = agg_type %>% filter(type == 'Newspaper'),
           stat = 'identity',
           width = 7,
           fill = NA,
           color = 'black',
           lwd = 0.3)
  return(g)
}
make_newspaper_plot('en')

Other violence related words

Among certain accounts (), a similar trend emerges if we examine other violence-related words, such as "assault", "attack", "comandos" and "rebellion". The exact search strings used for this were:

attack: 'atac|ataq' (ie, atacar, ataque, ataques, atacado)
rebellion: 'rebel' (ie, rebelión, rebeldes, rebelar)
comandos: 'comando' (ie, comandos)
assault: 'asalt' (ie, asaltar, asalto, asaltan)

With the exception of the August 2017 peak with the word "attack" (a function of the terrorist attack on the Ramblas at that time), references to violent words among mainstraim Spanish and Catalan politicians were low during the supposed violent rebellion of the Catalan independence movement, but have been increasing since. The below chart shows the number of tweets per month containing the titled words.

make_ciudadanos_plot <- function(language = 'en'){

   if(language == 'en'){
    title = 'Tweets containing violence-related words'
         subtitle = 'Monthly, August 2017-November 2018'
         caption = 'Data extracted/processed and chart created on 5 December 2018. Lines smoothed via local regression. Joe Brew. @joethebrew.'
         x = 'Month'
         y = 'Tweets'
  } else {
    title = 'Tuits amb paraules relacionades amb la violència'
         subtitle = 'Mensual, Agost 2017-Novembre 2018'
         caption = 'Dades descarregades/processades i gràfic creat el 5 de desembre 2018. Linies suavitzades a traves de regressió local. Joe Brew. @joethebrew.'
         x = 'Mes'
         y = 'Tuits'
  }
  other <- 
    tl %>%
    filter(type != 'Newspaper') %>%
  filter(username %in% c('albert_rivera', 'carrizosacarlos', 'ciutadanscs',
                                'inesarrimadas', 'ciudadanoscs', 'societatcc',
                         'albiol_xg', 'alevysoler', 'miqueliceta', 'pablocasado_',
                         'ppcatalunya', 'ppopular', 'psoe', 'sanchezcatejon', 'socialistes_cat')) %>%
         mutate(person = paste0('@', username)) %>%
  mutate(year_month = date_truncate(date, level = 'month')) %>%
  group_by(type, date = year_month, 
           person) %>%
  summarise(n = length(which(violence_not_genero)),
            n_atac = length(which(atac)),
            n_violence_indepe = length(which(violence_indepe)),
            n_asalt = length(which(asalt)),
            n_comandos = length(which(comandos)),
            n_rebelion = length(which(rebelion))) %>%
  dplyr::select(date, type, person, n_atac, n_asalt, n_rebelion, n_comandos) %>%
  filter(date >= '2017-08-01', date <= '2018-11-30')

pd <- other %>% gather(key, value, n_atac:n_comandos)

  right <- data_frame(key = c('n_atac',
                              'n_asalt',
                              'n_comandos',
                              'n_rebelion'),
                      new_key = c('Atacar/ataque',
                                  'Asaltar/asalto',
                                  'Comandos',
                                  'Rebelión/rebeldes'))
  if(language == 'en'){
    right$new_key <- c('Attack', 'Assault', 'Comandos', 'Rebellion/rebels')
  }

  pd <- left_join(pd,
                  right) %>%
    dplyr::select(-key) %>%
    dplyr::rename(key = new_key)
g <- ggplot(data = pd,
       aes(x = date,
           y = value,
           color = person,
           group = person)) +
  geom_line(alpha = 0.6, size = 0.3) + 
  geom_point(alpha = 0.4) +
  facet_wrap(~key, scales = 'free_y') +
  theme_vilaweb() +
  geom_line(stat = 'smooth', method = 'auto', 
            formula = y ~ poly(x, 1),
            size = 0.5, alpha = 0.8) +
   labs(title = title,
        subtitle = subtitle,
        x = x,
        y = y,
        caption = caption) +
  theme(axis.text.x = element_text(size = 8)) +
  scale_x_date(labels = function(x){format(x, '%b\n%Y')}) +
  theme(plot.caption = element_text(size = 7),
        strip.text = element_text(size = 18)) +
  scale_color_manual(name = '',
                     values = databrew::make_colors(n = length(unique(pd$person)))) +
      guides(color = guide_legend(ncol = 1)) +
  theme(legend.position = 'right')

return(g)

}

make_ciudadanos_plot()

Comparison with actual violent event

The trend of using event-related violence words at a low frequency, but then increasing their usage over time is the exact opposite of how reality is usually perceived and discussed. For example, if we examine the use of the words "terror" ("terrorism", "terrorist", etc.) and "rambla" (the location of the August 2017 terrorist attack) among the same group of accounts, we say that there is an initial spike in attention, followed by a long decline (see below chart with monthly tweets mentioning the strings "rambla" and "terror" among the same politicians as the above chart). This is normal for newsworthy events.

make_terror_plot <- function(language = 'en'){

   if(language == 'en'){
    title = 'Twitter and terrorism'
         subtitle = 'Monthly, July 2017-February 2018'
         caption = 'Data extracted/processed and chart created on 5 December 2018. Lines smoothed via local regression. Joe Brew. @joethebrew.'
         x = 'Month'
         y = 'Tweets'
  } else {
    title = 'Twitter i el terrorisme'
         subtitle = 'Mensual, Juliol 2017-Febrer 2018'
         caption = 'Dades descarregades/processades i gràfic creat el 5 de desembre 2018. Linies suavitzades a traves de regressió local. Joe Brew. @joethebrew.'
         x = 'Mes'
         y = 'Tuits'
  }
  other <- 
    tl %>%
  filter(username %in% c('albert_rivera', 'carrizosacarlos', 'ciutadanscs',
                                'inesarrimadas', 'ciudadanoscs', 'societatcc',
                         'albiol_xg', 'alevysoler', 'miqueliceta', 'pablocasado_',
                         'ppcatalunya', 'ppopular', 'psoe', 'sanchezcatejon', 'socialistes_cat')) %>%
         mutate(person = paste0('@', username)) %>%
  mutate(year_month = date_truncate(date, level = 'month')) %>%
  group_by(type, date = year_month, 
           person) %>%
  summarise(n = length(which(violence_not_genero)),
            n_terror = length(which(terror)),
            n_rambla = length(which(ramblas))) %>%
  dplyr::select(date, type, person, n_terror, n_rambla) %>%
  filter(date >= '2017-07-01', date <= '2018-02-01')

pd <- other %>% gather(key, value, n_terror:n_rambla)

  right <- data_frame(key = c('n_terror',
                              'n_rambla'),
                      new_key = c('Terrorisme/Terrorista', 'Ramblas'))
  if(language == 'en'){
    right$new_key <- c('Terrorism/Terrorist', 'Ramblas')
  }

  pd <- left_join(pd,
                  right) %>%
    dplyr::select(-key) %>%
    dplyr::rename(key = new_key)
g <- ggplot(data = pd,
       aes(x = date,
           y = value,
           color = person,
           group = person)) +
  geom_line(alpha = 0.6, size = 0.3) + 
  geom_point(alpha = 0.4) +
  facet_wrap(~key, scales = 'free_y') +
  theme_vilaweb() +
  geom_line(stat = 'smooth', method = 'auto', 
            formula = y ~ poly(x, 1),
            size = 0.5, alpha = 0.8) +
   labs(title = title,
        subtitle = subtitle,
        x = x,
        y = y,
        caption = caption) +
  theme(axis.text.x = element_text(size = 8)) +
  scale_x_date(labels = function(x){format(x, '%b\n%Y')}) +
  theme(plot.caption = element_text(size = 7),
        strip.text = element_text(size = 18)) +
  scale_color_manual(name = '',
                     values = databrew::make_colors(n = length(unique(pd$person)))) +
      guides(color = guide_legend(ncol = 3)) +
  theme(legend.position = 'bottom')

return(g)

}

make_terror_plot()

The above chart shows the normal relationship between and event and words related to the event. The terrorist attack of august 2017 certainly occurred - there is no debate about that. And the tweet-trail of the terrorist attack is normal - high attention immediately after the event, followed by a decline. What is abnormal is the fact that there was a supposed, major event (violent rebellion/uprising) which very few perceived as violent at the moment of the event. And the tweet trail of the violent rebellion takes on the opposite form: more people talked about the "violence" of the "rebellion" many months after it took place than in the immediate days after.

## Direct comparison: real violence vs. invented violence

make_comparison_plot <- function(language = 'en'){

   if(language == 'en'){
    title = 'Tweets containing the following words'
         subtitle = 'Terror (terrorismo, terrorista, terrorisme) vs. Rebellion (rebellion, rebeldes, rebelar)'
         caption = 'Data extracted/processed and chart created on 5 December 2018. Lines smoothed via local regression. Joe Brew. @joethebrew.'
         x = 'Month'
         y = 'Tweets'
  } else {
    title = 'Tuits que contenen les paraules següents'
         subtitle = 'Terrorismo   (terrorismo, terrorista, terrorisme, etc.)   vs.\nRebelión        (rebeldes, rebelar, rebel·lió, etc.)'
         caption = 'Dades descarregades/processades i gràfic creat el 5 de desembre 2018. Linies suavitzades a traves de regressió local. Joe Brew. @joethebrew.'
         x = 'Mes'
         y = 'Tuits'
  }
  other <- 
    tl %>%
  filter(username %in% c('albert_rivera', 'carrizosacarlos', 'ciutadanscs',
                                'inesarrimadas', 'ciudadanoscs', 'societatcc',
                         'albiol_xg', 'alevysoler', 'miqueliceta', 'pablocasado_',
                         'ppcatalunya', 'ppopular', 'psoe', 'sanchezcatejon', 'socialistes_cat')) %>%# |
    #        type != 'Person') %>%
    # filter(type != 'Newspaper') %>%
         mutate(person = paste0('@', username)) %>%
  mutate(year_month = date_truncate(date, level = 'month')) %>%
  group_by(type, date = year_month, 
           person) %>%
  summarise(n = length(which(violence_not_genero)),
            n_terror = length(which(terror)),
            n_rebelion = length(which(rebelion))) %>%
  dplyr::select(date, type, person, n_terror, n_rebelion) %>%
  filter(date >= '2017-07-01', date <= '2018-11-30')

pd <- other %>% gather(key, value, n_terror:n_rebelion)

  right <- data_frame(key = c('n_terror',
                              'n_rebelion'),
                      new_key = c('Terrorismo', 'Rebelión'))
  if(language == 'ca'){
    right$new_key <- factor(right$new_key,
                          levels = c('Terrorismo', 'Rebelión'))
  } else if(language == 'en'){
    right$new_key <- c('Terrorism', 'Rebellion')
    right$new_key <- factor(right$new_key,
                          levels = c('Terrorism', 'Rebellion'))
  }

  # Keep only relevant dates
  pd <- pd %>%
    filter(
      (grepl('error', key) & date >= '2017-07-01' & date <= '2018-01-01' ) |
        (grepl('ebel', key) & date >= '2017-08-01' & date <= '2018-11-30') 
    )

  pd <- left_join(pd,
                  right) %>%
    dplyr::select(-key) %>%
    dplyr::rename(key = new_key)
g <- ggplot(data = pd,
       aes(x = date,
           y = value,
           color = person,
           group = person)) +
  geom_line(alpha = 0.6, size = 0.8) +
  geom_point(alpha = 0.8) +
  facet_wrap(~key,
             scales = 'free') +
  theme_vilaweb() +
  geom_line(stat = 'smooth', method = 'auto', 
            formula = y ~ poly(x, 1),
            size = 0.8, alpha = 0.8) +
   labs(title = title,
        subtitle = subtitle,
        x = x,
        y = y,
        caption = caption) +
  theme(axis.text.x = element_text(size = 8)) +
  scale_x_date(labels = function(x){format(x, '%b\n%Y')}) +
  theme(plot.caption = element_text(size = 10),
        strip.text = element_text(size = 18),
        legend.text = element_text(size = 11),
        axis.text.x = element_text(size = 14),
        plot.subtitle = element_text(size = 20),
        plot.title = element_text(size = 24)) +
  scale_color_manual(name = '',
                     values = databrew::make_colors(n = length(unique(pd$person)))) +
      guides(color = guide_legend(ncol = 5)) +
  theme(legend.position = 'bottom') +
    ylim(-1, 35) 

return(g)
}

make_comparison_plot('ca')
ggsave('~/Desktop/violence.png')

Raw data

The raw data (number of tweets of each type for each account for each month) is viewable here

# Create data outputs folder
if(!dir.exists('data_outputs')){
  dir.create('data_outputs')
}

# Create a table
x <- make_people_plot(return_table = TRUE)
write_csv(x, 'data_outputs/raw_data.csv')

joebrew/vilaweb documentation built on Sept. 11, 2020, 3:42 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

joebrew/vilaweb
Reproducible data analysis for www.vilaweb.cat

In joebrew/vilaweb: Reproducible data analysis for www.vilaweb.cat

Executive summary

Overview

The hypothesis

If violence took place, then politicians will have talked about it.

The data

Acquiring data

The methods

Filtering data

Aggregation and counting

The results

Tweets from politicians and parties

Tweets about violence from newspapers

Other violence related words

Comparison with actual violent event

Raw data

R Package Documentation

Browse R Packages

We want your feedback!

joebrew/vilaweb Reproducible data analysis for www.vilaweb.cat

In joebrew/vilaweb: Reproducible data analysis for www.vilaweb.cat

Executive summary

Overview

The hypothesis

If violence took place, then politicians will have talked about it.

The data

Acquiring data

The methods

Filtering data

Aggregation and counting

The results

Tweets from politicians and parties

Tweets about violence from newspapers

Other violence related words

Comparison with actual violent event

Raw data

R Package Documentation

Browse R Packages

We want your feedback!

joebrew/vilaweb
Reproducible data analysis for www.vilaweb.cat