knitr::opts_chunk$set(
  collapse = TRUE, warning = FALSE,
  comment = "#>"
)

ISMB/ECCB 2019 conference was organized from July 21st - July 25th in Basel, Switzerland.

Data collection

Twitter data for the conference days July 21 - July 25th, 2019 was collected and cleaned on July 25th evening after last keynote.

library(rtweet)
library(dplyr)

all_ismb <- search_tweets("#ISMBECCB", include_rts = FALSE, retryonratelimit = TRUE)
all_ismb_one <- search_tweets("#ismbeccb", include_rts = FALSE, retryonratelimit = TRUE)
all_ismb_two <- search_tweets("#ISMBECCB2019", include_rts = FALSE, retryonratelimit = TRUE)
all_ismb_three <- search_tweets("#ISMBECCB19", include_rts = FALSE, retryonratelimit = TRUE)
all_ismb_four <- search_tweets("#ISMB2019", include_rts = FALSE, retryonratelimit = TRUE)

all_ismb <- rbind(all_ismb, all_ismb_one, all_ismb_two, all_ismb_three, all_ismb_four) %>% unique()
saveRDS(all_ismb, file = "all_ismb_lastday_allincluded.rds")

Data cleaning

I only used the following columns from above data

library(ISMB2019)
data(ismb_dat2019)
ismb_dat <- readRDS("all_ismb_lastday_allincluded.rds")

ismb_dat_clean <- ismb_dat %>% dplyr::select(created_at, screen_name, text)

Frequency of tweets per day

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(extrafont))
suppressPackageStartupMessages(library(lubridate))

suppressMessages(loadfonts())

## time function
get_time <- function(time = now()) {
  time %>%
    str_split(" ") %>%
    map_chr(2) %>%
    hms()
}

ismb_dat2019_2 <- ismb_dat2019 %>% 
  dplyr::mutate(conf_day = lubridate::day(created_at),
                conf_day = paste0("July ", conf_day),
                conf_time = get_time(created_at))  

# frequency of tweets -----------------------------------------------------
## frequency of tweets along 24hr of each day
## binned by per-hour

conf_days_freq <- ismb_dat2019_2 %>% 
  dplyr::select(conf_day, conf_time) %>% 
  tidyr::separate(conf_time, into = c("Hr", "Mn", "Sc")) %>% 
  dplyr::select(conf_day, Hr) %>% 
  dplyr::count(conf_day, Hr) %>% 
  dplyr::filter(grepl("21|22|23|24|25", conf_day)) 

## replace M with 0H
conf_days_freq <- conf_days_freq %>% 
  dplyr::mutate(Hr_cl = dplyr::case_when(grepl("H", Hr) ~ Hr,
                                         !grepl("H", Hr) ~ "0H")) %>% 
  dplyr::select(conf_day, Hr_cl, n) 

hr_order <- conf_days_freq$Hr_cl %>% gtools::mixedsort() %>% unique()
conf_days_freq$Hr_cl <- factor(conf_days_freq$Hr_cl, levels = hr_order)


ggplot(conf_days_freq, aes(Hr_cl, n, color = conf_day))+
  geom_line(group = 1, size = 1.2)+
  theme_minimal(base_size = 18)+
  scale_y_continuous(breaks = c(0, 75, 150),
                     labels = c(0, 75, 150))+
  scale_x_discrete(breaks = c("0H", "2H", "4H", "6H", "8H", "10H", "12H", "14H", "16H", "18H", "20H", "22H"),
                     labels = c("0H", "2H", "4H", "6H", "8H", "10H", "12H", "14H", "16H", "18H", "20H", "22H"))+
  scale_color_manual(values = c("blue", "red", "green", "black", "purple"))+
  facet_grid(conf_day ~ ., scales = "free_x")+
  ylab("# Tweets")+
  xlab("Hour of the day")+
  labs(caption = '@nerd_yie / itsvenu.github.io')+
  theme(panel.grid.minor = element_blank(),
        legend.position = "none",
        axis.text = element_text(family = "Seravek", color = "black"),
        axis.title = element_text(family = "Seravek", color = "black"),
        strip.text.y = element_text(angle = 0, family = "Seravek", color = "red"),
        plot.caption = element_text(color = "#EC0108", family = "Seravek"),
        plot.title = element_text(hjust = 0.5, family = "Seravek", color = "#EC0108"))+
  ggtitle("Frequency of tweets")

Tweets that contain ML/DL/Single-cell

Identifying the trends of topics in talks

ml_count <- ismb_dat2019 %>% 
  dplyr::filter(grepl("machine-learning", text, ignore.case = TRUE) | 
                  grepl("machine learning", text, ignore.case = TRUE) | 
                  grepl("#Machinelearning", text, ignore.case = TRUE)) %>%
  dim %>% .[[1]]

dl_count <- ismb_dat2019 %>% 
  dplyr::filter(grepl("deep-learning", text, ignore.case = TRUE) | 
                  grepl("deep learning", text, ignore.case = TRUE) | 
                  grepl("#deeplearning", text, ignore.case = TRUE)) %>% 
  dim %>% .[[1]]


sc_count <- ismb_dat2019 %>% 
  dplyr::filter(grepl("singlecell", text, ignore.case = TRUE) | 
                  grepl("single cell", text, ignore.case = TRUE) | 
                  grepl("scRNA", text, ignore.case = TRUE) | 
                  grepl("scATAC", text, ignore.case = TRUE)) %>% 
  dim %>% .[[1]]

topic_freq <- data.frame(Topic = c("Machine Learning", "Deep Learning", "Single-cell*"), Freq = c(ml_count, dl_count, sc_count))

ggplot(topic_freq, aes(Topic, Freq, fill = Topic))+
  geom_bar(stat = "identity")+
  xlab("Topic")+ylab("# Tweets")+
  coord_flip()+
  theme_minimal(base_size = 18)+
  scale_fill_manual(values = c("gray", "dimgray", "black"))+
  labs(caption = '@nerd_yie / itsvenu.github.io')+
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = "none",
        axis.text = element_text(family = "Seravek", color = "black"),
        axis.title = element_text(family = "Seravek", color = "black"),
        strip.text.y = element_text(angle = 0, family = "Seravek", color = "red"),
        plot.caption = element_text(color = "#EC0108", family = "Seravek"),
        plot.title = element_text(hjust = 0.5, family = "Seravek", color = "#EC0108"))+
  ggtitle("Frequency of topics")

Open positions advertised during the conference

ismb_dat2019 %>% 
  dplyr::select(screen_name, text) %>% 
  dplyr::filter(grepl("open position", text, ignore.case = TRUE) | 
                  grepl("postdoctoral position", text, ignore.case = TRUE) |
                  grepl("open postdoc", text, ignore.case = TRUE) |
                  grepl("looking for postdoc", text, ignore.case = TRUE) |
                  grepl("looking for #postdoc", text, ignore.case = TRUE) |
                  grepl("#Job", text, ignore.case = TRUE)) %>% 
  DT::datatable()

Potential software tool links and biorxv papers

ismb_dat2019 %>% 
  dplyr::filter(grepl("github", text, ignore.case = TRUE) | 
                  grepl("biorxv", text, ignore.case = TRUE)) %>% 
  DT::datatable()

END



itsvenu/ISMB2019 documentation built on Nov. 4, 2019, 2:13 p.m.