library("rvest")
if (Sys.info()[["user"]] == "jcrodriguez") {
setwd("~/mytmp/emoji_ds/")
}
emojipedia_url <- "https://emojipedia.org/"
## download uncategorized emojis
uncateg_url <- paste0(emojipedia_url, "unicode-12.0")
uncateg_emos <- read_html(uncateg_url) %>%
html_node(".content") %>%
html_children()
uncateg_emos <- uncateg_emos[[1]] %>%
html_children()
uncateg_emos <- uncateg_emos[[3]] %>%
html_children()
uncateg_emo_urls <- unlist(lapply(uncateg_emos, function(act_emo) {
act_emo %>%
html_children() %>%
html_attr("href")
}))
uncateg_save_dir <- "uncategorized"
dir.create(uncateg_save_dir, showWarnings = FALSE)
invisible(lapply(uncateg_emo_urls, function(act_url) {
act_emo_url <- paste0(emojipedia_url, act_url)
act_emo_vers <- read_html(act_emo_url) %>%
html_node("section.vendor-list") %>%
html_nodes("div.vendor-container.vendor-rollout-target")
lapply(act_emo_vers, function(act_emo) {
vers <- act_emo %>%
html_node("div.vendor-info") %>%
html_text() %>%
trimws()
ver_save_dir <- paste0(uncateg_save_dir, "/", gsub(" ", "_", vers))
dir.create(ver_save_dir, showWarnings = FALSE)
img <- act_emo %>%
html_node("div.vendor-image") %>%
html_node("img") %>%
html_attr("src")
save_file <- paste0(
ver_save_dir, "/", gsub("/", "", act_url), ".",
gsub(".*\\.", "", basename(img))
)
if (!file.exists(save_file)) {
download.file(img, save_file, quiet = TRUE)
}
})
}))
## get emojis categories
categories <- read_html(emojipedia_url) %>%
html_node(".block") %>%
html_children()
categories <- categories[[2]] %>%
html_children()
categories <- do.call(rbind, lapply(categories, function(act_cat) {
categ_node <- act_cat %>% html_children()
c(
html_text(categ_node),
html_attr(categ_node, "href")
)
}))
## download each category emojis
# get each emoji url
categ_emo_urls <- lapply(seq_len(nrow(categories)), function(i) {
act_categ <- categories[i, ]
act_categ_url <- paste0(emojipedia_url, act_categ[[2]])
act_categ_emos <- read_html(act_categ_url) %>%
html_node(".emoji-list") %>%
html_children()
unlist(lapply(act_categ_emos, function(act_emo) {
act_emo %>%
html_children() %>%
html_attr("href")
}))
})
my_vers <- dir("uncategorized/")
invisible(lapply(seq_along(categ_emo_urls), function(i) {
categ_save_dir <- gsub("/", "", categories[i, 2])
dir.create(categ_save_dir, showWarnings = FALSE)
act_urls <- categ_emo_urls[[i]]
print(c(categ_save_dir, length(act_urls)))
lapply(act_urls, function(act_url) {
act_emo_url <- paste0(emojipedia_url, act_url)
act_emo_vers <- read_html(act_emo_url) %>%
html_node("section.vendor-list") %>%
html_nodes("div.vendor-container.vendor-rollout-target")
lapply(act_emo_vers, function(act_emo) {
vers <- act_emo %>%
html_node("div.vendor-info") %>%
html_text() %>%
trimws()
if (!vers %in% my_vers) {
return()
}
ver_save_dir <- paste0(categ_save_dir, "/", gsub(" ", "_", vers))
dir.create(ver_save_dir, showWarnings = FALSE)
img <- act_emo %>%
html_node("div.vendor-image") %>%
html_node("img") %>%
html_attr("src")
save_file <- paste0(
ver_save_dir, "/", gsub("/", "", act_url), ".",
gsub(".*\\.", "", basename(img))
)
if (!file.exists(save_file)) {
download.file(img, save_file, quiet = TRUE)
}
})
})
}))
## explore downloaded data
library("dplyr")
n_emojis <- do.call(rbind, lapply(dir(), function(act_categ) {
act_categ_vers <- dir(act_categ)
data.frame(
Version = act_categ_vers,
Category = act_categ,
Emojis = sapply(act_categ_vers, function(act_vers) {
length(dir(paste0(act_categ, "/", act_vers)))
})
)
}))
n_emojis <- n_emojis %>% filter(Category != "uncategorized")
n_emojis %>%
group_by(Version) %>%
summarise(nemo = sum(Emojis)) %>%
arrange(nemo)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.