In joebrew/vilaweb: Reproducible data analysis for www.vilaweb.cat

# Basic knitr options
library(knitr)
opts_chunk$set(comment = NA, 
               echo = FALSE, 
               warning = FALSE, 
               message = FALSE, 
               error = TRUE, 
               cache = FALSE,
               fig.path = 'figures/')

# Libraries
library(vilaweb)
library(rtweet)
library(tidyverse)
library(databrew)
library(translateR)
library(sentimentr) # https://github.com/trinker/sentimentr
require(RPostgreSQL)
require(readr)  
require(DBI)

# Define function for improving timeline
improve_timeline <- function(tmls){
  tmls$time_only <- sapply(strsplit(substr(tmls$created_at, 11,19), ":"),
                           function(x){as.numeric(x) %*% c(3600, 60, 1)})
  tmls$time_only <- tmls$time_only / 3600
  tmls$date <- as.Date(cut(tmls$created_at, 'day'))
  tmls$hour <- as.POSIXct(cut(tmls$created_at, 'hour'))
  return(tmls)
}
# # Get most recent tweets from our people of interest
people <- 
  tolower(c('Santi_ABASCAL',
    'Albert_Rivera',
    'InesArrimadas',
    'sanchezcastejon',
    'pablocasado_',
    'KRLS',
    'QuimTorraiPla',
    'perearagones',
    'ALevySoler',
    'elsa_artadi',
    'miqueliceta',
    'Pablo_Iglesias_'))

if(file.exists('tl.RData')){
  load('tl.RData')
} else {
  # Connect to the db
  pg = DBI::dbDriver("PostgreSQL")
  con = DBI::dbConnect(pg, dbname="twitter")
  tl <- RPostgreSQL::dbGetQuery(
    con,
    paste0("SELECT * FROM twitter")
  )
  save(tl, file = 'tl.RData')  
  dbDisconnect(con)
}


# Search for the words humiliation
find_humil <- function(x){
  grepl('humilla|humilia', tolower(x))
}
tl <- tl %>%
  mutate(humil = find_humil(tweet))

# Plot data
pd <- tl %>%
  # filter(tolower(username) %in% tolower(people)) %>%
  group_by(person = username,
           humil) %>%
  tally %>%
  ungroup %>%
  group_by(person) %>%
  mutate(p = n / sum(n) * 100) %>%
  ungroup %>%
  filter(humil)
left <- expand.grid(person = sort(unique(tl$username)),
                    humil = sort(unique(tl$humil)))
pd <- left_join(left, pd)
pd <- pd %>%
  arrange(desc(p))
pd$person <- factor(pd$person, levels = unique(pd$person))
ggplot(data = pd,
       aes(x = person,
           y = p)) +
  geom_bar(stat = 'identity',
           aes(fill = person)) +
  theme_vilaweb() +
      theme(legend.position = 'none') +
  labs(x = '',
       y = '%',
       title = '% de tweets con la palabra "humillación"',
       subtitle = 'Últimos 3.200 tweets por cada persona',
       caption = 'Incluye "humillar", "humillado", "humiliar", "humiliate", etc. Datos del Twitter API. Gráfico @joethebrew.') +
  # scale_y_continuous(breaks = seq(0,1, 0.1)) +
  theme(axis.text.x = element_text(angle = 90))


pd <- tl %>%
  filter(tolower(username) %in% tolower(people)) %>%
  filter(humil) %>%
  mutate(date = as.Date(cut(date, 'month'))) %>%
  group_by(date, person = username) %>%
  tally %>%
  ungroup 
left <- expand.grid(person = sort(unique(tl$username)),
                    humil = sort(unique(tl$humil)),
                    date = sort(unique(pd$date)))
pd <- left_join(left, pd)

# Keep only relevant year
pd <- pd %>% filter(date >= '2018-01-01')

ggplot(data = pd,# %>% filter(person %in% c('Albert_Rivera',
                                          # 'pablocasado_',
                                          # 'Santi_ABASCAL')),
       aes(x = date,
           y = n)) +
  geom_bar(stat = 'identity',
           aes(fill = person)) +
  facet_wrap(~person,
             ncol = 4) +
  theme_vilaweb() +
  labs(x = 'Fecha',
       y = 'Tweets (mensual)',
       title = 'Tweets con la palabra "humillación"*',
       subtitle = 'Últimos 3.200 tweets por cada persona',
       caption = 'Incluye "humillar", "humillado", "humiliar", "humiliate", etc. Datos del Twitter API. Gráfico @joethebrew.') +
  # scale_fill_manual(name = ' ',
  #                   values = c('darkorange',
  #                              'blue',
  #                              'green')) +
  theme(legend.position = 'none') +
  scale_x_date(breaks = sort(unique(pd$date)),
               labels = spanish_date(new_line = FALSE)) +
  theme(axis.text.x = element_text(angle = 90,
                                   size = 6))
# ggsave('humiliate.png')