knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, fig.path = "figure/", fig.width = 8, fig.height = 4, out.width = "100%" )
Emojis are increasingly important in social media communication. This vignette demonstrates how to analyze emoji usage patterns in YouTube comments using tuber's built-in emoji functions.
library(tuber) library(dplyr) library(ggplot2)
yt_oauth("your_app_id", "your_app_secret") comments <- get_all_comments(video_id = "your_video_id", max_results = 500)
comments <- comments |> mutate( has_emoji = has_emoji(textDisplay), emoji_count = count_emojis(textDisplay) ) summary(comments$emoji_count) emoji_rate <- mean(comments$has_emoji, na.rm = TRUE) * 100 cat("Comments with emojis:", round(emoji_rate, 1), "%\n")
comments |> filter(emoji_count > 0) |> ggplot(aes(x = emoji_count)) + geom_histogram(binwidth = 1, fill = "steelblue", color = "white") + labs( title = "Distribution of Emojis per Comment", x = "Number of Emojis", y = "Number of Comments" ) + theme_minimal()
all_emojis <- unlist(extract_emojis(comments$textDisplay)) emoji_freq <- as.data.frame(table(all_emojis), stringsAsFactors = FALSE) names(emoji_freq) <- c("emoji", "count") emoji_freq <- emoji_freq[order(-emoji_freq$count), ] head(emoji_freq, 15) emoji_freq |> head(10) |> ggplot(aes(x = reorder(emoji, count), y = count)) + geom_col(fill = "steelblue") + coord_flip() + labs( title = "Top 10 Most Used Emojis", x = "Emoji", y = "Count" ) + theme_minimal()
comments <- comments |> mutate( date = as.Date(publishedAt), emoji_count = count_emojis(textDisplay) ) daily_emoji <- comments |> group_by(date) |> summarise( total_comments = n(), comments_with_emoji = sum(has_emoji, na.rm = TRUE), total_emojis = sum(emoji_count, na.rm = TRUE), emoji_rate = comments_with_emoji / total_comments * 100, avg_emojis = total_emojis / total_comments ) ggplot(daily_emoji, aes(x = date, y = emoji_rate)) + geom_line(color = "steelblue") + geom_smooth(method = "loess", se = TRUE, alpha = 0.2) + labs( title = "Emoji Usage Rate Over Time", x = "Date", y = "% of Comments with Emojis" ) + theme_minimal()
Emojis can indicate sentiment. Here's a simple categorization approach:
positive_emojis <- c( "\U0001F600", "\U0001F601", "\U0001F602", "\U0001F603", "\U0001F604", "\U0001F605", "\U0001F606", "\U0001F60A", "\U0001F60D", "\U0001F618", "\U0001F44D", "\U0001F44F", "\U00002764", "\U0001F389", "\U0001F38A" ) negative_emojis <- c( "\U0001F620", "\U0001F621", "\U0001F622", "\U0001F623", "\U0001F624", "\U0001F625", "\U0001F62D", "\U0001F44E", "\U0001F4A9", "\U0001F61E" ) comments <- comments |> mutate( emojis = extract_emojis(textDisplay), pos_emoji = sapply(emojis, function(e) sum(e %in% positive_emojis)), neg_emoji = sapply(emojis, function(e) sum(e %in% negative_emojis)), emoji_sentiment = case_when( pos_emoji > neg_emoji ~ "positive", neg_emoji > pos_emoji ~ "negative", pos_emoji == 0 & neg_emoji == 0 ~ "none", TRUE ~ "neutral" ) ) table(comments$emoji_sentiment)
engagement_summary <- comments |> group_by(has_emoji) |> summarise( n = n(), mean_likes = mean(likeCount, na.rm = TRUE), median_likes = median(likeCount, na.rm = TRUE) ) print(engagement_summary) ggplot(comments, aes(x = has_emoji, y = likeCount + 1)) + geom_boxplot(fill = "steelblue", alpha = 0.7) + scale_y_log10() + labs( title = "Like Counts: Emoji vs Non-Emoji Comments", x = "Contains Emoji", y = "Likes (log scale)" ) + theme_minimal()
video_ids <- c("video_id_1", "video_id_2", "video_id_3") all_comments <- lapply(video_ids, function(vid) { comments <- get_all_comments(video_id = vid, max_results = 200) comments$video_id <- vid comments }) all_comments <- bind_rows(all_comments) video_emoji_stats <- all_comments |> mutate(emoji_count = count_emojis(textDisplay)) |> group_by(video_id) |> summarise( total_comments = n(), emoji_rate = mean(emoji_count > 0) * 100, avg_emojis = mean(emoji_count) ) print(video_emoji_stats)
For text analysis that should exclude emojis:
comments <- comments |> mutate( clean_text = remove_emojis(textDisplay), clean_text = trimws(gsub("\\s+", " ", clean_text)) ) head(comments$clean_text[comments$has_emoji], 3)
For large datasets:
comments_sample <- comments[sample(nrow(comments), min(1000, nrow(comments))), ] comments_sample <- comments_sample |> mutate(emoji_count = count_emojis(textDisplay)) emoji_rate_estimate <- mean(comments_sample$emoji_count > 0) * 100
Key functions used in this analysis:
| Function | Purpose |
|----------|---------|
| has_emoji() | Check if text contains emojis |
| count_emojis() | Count emojis in text |
| extract_emojis() | Get list of emojis from text |
| remove_emojis() | Strip emojis from text |
| replace_emojis() | Replace emojis with custom text |
These functions work directly on character vectors, making them easy to use with dplyr::mutate() and other tidyverse workflows.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.