knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, comment = "#>" )
ISMB/ECCB 2019 conference was organized from July 21st - July 25th in Basel, Switzerland.
Twitter data for the conference days July 21 - July 25th, 2019 was collected and cleaned on July 25th evening after last keynote.
library(rtweet) library(dplyr) all_ismb <- search_tweets("#ISMBECCB", include_rts = FALSE, retryonratelimit = TRUE) all_ismb_one <- search_tweets("#ismbeccb", include_rts = FALSE, retryonratelimit = TRUE) all_ismb_two <- search_tweets("#ISMBECCB2019", include_rts = FALSE, retryonratelimit = TRUE) all_ismb_three <- search_tweets("#ISMBECCB19", include_rts = FALSE, retryonratelimit = TRUE) all_ismb_four <- search_tweets("#ISMB2019", include_rts = FALSE, retryonratelimit = TRUE) all_ismb <- rbind(all_ismb, all_ismb_one, all_ismb_two, all_ismb_three, all_ismb_four) %>% unique() saveRDS(all_ismb, file = "all_ismb_lastday_allincluded.rds")
I only used the following columns from above data
created_at
which corresponds to time the tweet was generatedscreen_name
twitter handle nametext
actual tweetlibrary(ISMB2019) data(ismb_dat2019)
ismb_dat <- readRDS("all_ismb_lastday_allincluded.rds") ismb_dat_clean <- ismb_dat %>% dplyr::select(created_at, screen_name, text)
suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(extrafont)) suppressPackageStartupMessages(library(lubridate)) suppressMessages(loadfonts()) ## time function get_time <- function(time = now()) { time %>% str_split(" ") %>% map_chr(2) %>% hms() } ismb_dat2019_2 <- ismb_dat2019 %>% dplyr::mutate(conf_day = lubridate::day(created_at), conf_day = paste0("July ", conf_day), conf_time = get_time(created_at)) # frequency of tweets ----------------------------------------------------- ## frequency of tweets along 24hr of each day ## binned by per-hour conf_days_freq <- ismb_dat2019_2 %>% dplyr::select(conf_day, conf_time) %>% tidyr::separate(conf_time, into = c("Hr", "Mn", "Sc")) %>% dplyr::select(conf_day, Hr) %>% dplyr::count(conf_day, Hr) %>% dplyr::filter(grepl("21|22|23|24|25", conf_day)) ## replace M with 0H conf_days_freq <- conf_days_freq %>% dplyr::mutate(Hr_cl = dplyr::case_when(grepl("H", Hr) ~ Hr, !grepl("H", Hr) ~ "0H")) %>% dplyr::select(conf_day, Hr_cl, n) hr_order <- conf_days_freq$Hr_cl %>% gtools::mixedsort() %>% unique() conf_days_freq$Hr_cl <- factor(conf_days_freq$Hr_cl, levels = hr_order) ggplot(conf_days_freq, aes(Hr_cl, n, color = conf_day))+ geom_line(group = 1, size = 1.2)+ theme_minimal(base_size = 18)+ scale_y_continuous(breaks = c(0, 75, 150), labels = c(0, 75, 150))+ scale_x_discrete(breaks = c("0H", "2H", "4H", "6H", "8H", "10H", "12H", "14H", "16H", "18H", "20H", "22H"), labels = c("0H", "2H", "4H", "6H", "8H", "10H", "12H", "14H", "16H", "18H", "20H", "22H"))+ scale_color_manual(values = c("blue", "red", "green", "black", "purple"))+ facet_grid(conf_day ~ ., scales = "free_x")+ ylab("# Tweets")+ xlab("Hour of the day")+ labs(caption = '@nerd_yie / itsvenu.github.io')+ theme(panel.grid.minor = element_blank(), legend.position = "none", axis.text = element_text(family = "Seravek", color = "black"), axis.title = element_text(family = "Seravek", color = "black"), strip.text.y = element_text(angle = 0, family = "Seravek", color = "red"), plot.caption = element_text(color = "#EC0108", family = "Seravek"), plot.title = element_text(hjust = 0.5, family = "Seravek", color = "#EC0108"))+ ggtitle("Frequency of tweets")
Identifying the trends of topics in talks
ml_count <- ismb_dat2019 %>% dplyr::filter(grepl("machine-learning", text, ignore.case = TRUE) | grepl("machine learning", text, ignore.case = TRUE) | grepl("#Machinelearning", text, ignore.case = TRUE)) %>% dim %>% .[[1]] dl_count <- ismb_dat2019 %>% dplyr::filter(grepl("deep-learning", text, ignore.case = TRUE) | grepl("deep learning", text, ignore.case = TRUE) | grepl("#deeplearning", text, ignore.case = TRUE)) %>% dim %>% .[[1]] sc_count <- ismb_dat2019 %>% dplyr::filter(grepl("singlecell", text, ignore.case = TRUE) | grepl("single cell", text, ignore.case = TRUE) | grepl("scRNA", text, ignore.case = TRUE) | grepl("scATAC", text, ignore.case = TRUE)) %>% dim %>% .[[1]] topic_freq <- data.frame(Topic = c("Machine Learning", "Deep Learning", "Single-cell*"), Freq = c(ml_count, dl_count, sc_count)) ggplot(topic_freq, aes(Topic, Freq, fill = Topic))+ geom_bar(stat = "identity")+ xlab("Topic")+ylab("# Tweets")+ coord_flip()+ theme_minimal(base_size = 18)+ scale_fill_manual(values = c("gray", "dimgray", "black"))+ labs(caption = '@nerd_yie / itsvenu.github.io')+ theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), legend.position = "none", axis.text = element_text(family = "Seravek", color = "black"), axis.title = element_text(family = "Seravek", color = "black"), strip.text.y = element_text(angle = 0, family = "Seravek", color = "red"), plot.caption = element_text(color = "#EC0108", family = "Seravek"), plot.title = element_text(hjust = 0.5, family = "Seravek", color = "#EC0108"))+ ggtitle("Frequency of topics")
ismb_dat2019 %>% dplyr::select(screen_name, text) %>% dplyr::filter(grepl("open position", text, ignore.case = TRUE) | grepl("postdoctoral position", text, ignore.case = TRUE) | grepl("open postdoc", text, ignore.case = TRUE) | grepl("looking for postdoc", text, ignore.case = TRUE) | grepl("looking for #postdoc", text, ignore.case = TRUE) | grepl("#Job", text, ignore.case = TRUE)) %>% DT::datatable()
ismb_dat2019 %>% dplyr::filter(grepl("github", text, ignore.case = TRUE) | grepl("biorxv", text, ignore.case = TRUE)) %>% DT::datatable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.