# Start compliance job
#
# This function attempts to start a compliance job with a given list of tweet status ids.
#
# return string ID of the compliance job. Save this to check the jobs status and download the results.
#
# param ids A character vector containing the status ids of the tweets that will be checked.
# param type Currently only "tweets" is allowed here.
# @param verbose If FALSE the function will not print any messages.
# @param bearer_token API bearer token.
#
start_job <- function(
ids,
type = "tweets",
verbose = TRUE,
bearer_token = get_bearer()
) {
# check arguments
if (!type %in% c("users", "tweets")) {
stop("Wrong type")
}
r <- httr::GET(
sprintf(
"https://api.twitter.com/2/compliance/jobs?type=%s",
type
),
httr::add_headers("Authorization" = paste0("Bearer ", bearer_token))
)
if ("in_progress" %in% (unlist(lapply(httr::content(r)$data, function(x) {
x[["status"]]
})))) {
compliance_job <-
httr::content(r)$data[[match("in_progress", (unlist(
lapply(httr::content(r)$data, function(x) {
x[["status"]]
})
)))]]
stop(
sprintf(
"Cannot start a new job, as there is an ongoing job with id `%s`",
compliance_job$id
)
)
}
if (!"created" %in% (unlist(lapply(httr::content(r)$data, function(x) {
x[["status"]]
})))) {
r <- httr::POST(
"https://api.twitter.com/2/compliance/jobs",
httr::add_headers(
"Authorization" = paste0("Bearer ", bearer_token),
"Content-Type" = "application/json"
),
body = sprintf('{ "type": "%s" }', type)
)
if (httr::status_code(r) != 200) {
stop("There was an error starting the compliance job")
} else {
compliance_job <- httr::content(r)$data
}
} else {
compliance_job <-
httr::content(r)$data[[match("created", (unlist(
lapply(httr::content(r)$data, function(x) {
x[["status"]]
})
)))]]
}
f <- tempfile()
con <- file(f)
writeLines(as.character(ids), f)
close(con)
r <- httr::PUT(
compliance_job$upload_url,
httr::add_headers("Content-Type" = "text/plain"),
body = httr::upload_file(f)
)
if (verbose) {
if (httr::status_code(r) != 200) {
stop("There was an error starting the compliance job")
} else {
usethis::ui_info(
"Job started with {usethis::ui_field('job ID')}: {usethis::ui_value(compliance_job$id)}"
)
expires_id <-
round(as.numeric(
difftime(
lubridate::fast_strptime(
compliance_job$download_expires_at,
"%Y-%m-%dT%H:%M:%OS%z"
),
lubridate::now(),
units = "hours"
)
), 0)
usethis::ui_line(
" {usethis::ui_field('Expires in')}: {usethis::ui_value(expires_id)} hours"
)
usethis::ui_line(
"You can check the job status with {usethis::ui_code(paste0('check_job(\"', compliance_job$id, '\")'))} and when it is ready download the data with {usethis::ui_code(paste0('download_job(\"', compliance_job$id, '\")'))}"
)
return(compliance_job$id)
}
} else {
return(compliance_job$id)
}
}
# Check status of a compliance job
#
# This function checks the status of a given compliance job.
#
# @return If `verbose` is set to FALSE, return the jobs download URL if ready or FALSE if still in progress.
#
# @param job_id A string containing the ID of the compliance job that will be checked.
# @param bearer_token API bearer token.
# @param verbose If FALSE, the function will not print any messages.
#
check_job <- function(job_id,
bearer_token = get_bearer(),
verbose = TRUE) {
if (typeof(job_id) == "list") {
job_tweets <- FALSE
job_users <- FALSE
if (!is.null(job_id$tweets)) {
job_tweets <- check_job(job_id$tweets, verbose = FALSE)
}
if (!is.null(job_id$users)) {
job_users <- check_job(job_id$users, verbose = FALSE)
}
return(list("tweets" = job_tweets, "users" = job_users))
} else {
r <- httr::GET(
sprintf(
"https://api.twitter.com/2/compliance/jobs/%s",
job_id
),
httr::add_headers(
"Authorization" = paste0("Bearer ", bearer_token),
"Content-Type" = "application/json"
)
)
job <- httr::content(r)$data
if (verbose) {
print_job(job)
} else {
if (job$status == "complete") {
return(job$download_url)
} else {
return(FALSE)
}
}
}
}
# Download compliance job
#
# This function attempts to download a compliance job if it is ready to download.
#
# @return dataframe A dataframe of all returned tweets.
#
# @param job_id A string containing the ID of the compliance job that will be downloaded.
# @param verbose If FALSE, the function will not print any messages.
#
download_job <- function(job_id,
verbose = FALSE) {
job_status <- check_job(job_id, verbose = FALSE)
if (job_status == FALSE) {
stop("The job is not complete yet.")
}
if (verbose) {
usethis::ui_info("Downloading results, this might take a while ...")
}
r <- httr::GET(job_status)
if (httr::status_code(r) != 200) {
stop("There was an error downloading the compliance job")
}
tweet_status <- httr::content(r, "text", encoding = "UTF-8")
tweet_status <- stringr::str_split(tweet_status, "\\n")[[1]]
tweet_status <- tweet_status[tweet_status != ""]
tweet_status <-
jsonlite::fromJSON(paste0("[", paste(tweet_status, collapse = ", "), "]"))
# return early if no items in response
if (length(tweet_status) == 0) {
if (verbose) {
usethis::ui_oops(
"All tweets/users are still online and publicly reachable, nothing to download."
)
}
return(NULL)
}
tweet_status <- tweet_status[!is.na(tweet_status["id"]), ]
tweet_status <- tweet_status[tweet_status["id"] != "", ]
rownames(tweet_status) <- NULL
if (nrow(tweet_status) > 0) {
tweet_status$redacted_at <-
lubridate::fast_strptime(tweet_status$redacted_at, "%Y-%m-%dT%H:%M:%OS%z")
tweet_status$redacted_at <-
as.POSIXct(tweet_status$redacted_at)
tweet_status$created_at <-
lubridate::fast_strptime(tweet_status$created_at, "%Y-%m-%dT%H:%M:%OS%z")
tweet_status$created_at <-
as.POSIXct(tweet_status$created_at)
return(tweet_status)
} else {
if (verbose) {
usethis::ui_oops(
"All tweets/users are still online and publicly reachable, nothing to download."
)
}
return(NULL)
}
}
print_job <- function(job) {
usethis::ui_line("{usethis::ui_field('Job ID')}: {usethis::ui_value(job$id)}")
usethis::ui_line(" {usethis::ui_field('Type')}: {usethis::ui_value(job$type)}")
created_at_minutes <-
round(as.numeric(
difftime(
lubridate::now(),
lubridate::fast_strptime(job$created_at, "%Y-%m-%dT%H:%M:%OS%z"),
units = "mins"
)
), 0)
created_at_hours <-
round(as.numeric(
difftime(
lubridate::now(),
lubridate::fast_strptime(job$created_at, "%Y-%m-%dT%H:%M:%OS%z"),
units = "hours"
)
), 0)
created_at_days <-
round(as.numeric(
difftime(
lubridate::now(),
lubridate::fast_strptime(job$created_at, "%Y-%m-%dT%H:%M:%OS%z"),
units = "days"
)
), 0)
if (created_at_minutes < 120) {
usethis::ui_line(
" {usethis::ui_field('Created at')}: {usethis::ui_value(created_at_minutes)} minutes ago"
)
} else if (created_at_minutes < 48) {
usethis::ui_line(
" {usethis::ui_field('Created at')}: {usethis::ui_value(created_at_hours)} hours ago"
)
} else {
usethis::ui_line(
" {usethis::ui_field('Created at')}: {usethis::ui_value(created_at_days)} days ago"
)
}
usethis::ui_line(" {usethis::ui_field('Status')}: {usethis::ui_value(job$status)}")
if (job$status != "expired") {
expires_id <-
round(as.numeric(
difftime(
lubridate::fast_strptime(job$download_expires_at, "%Y-%m-%dT%H:%M:%OS%z"),
lubridate::now(),
units = "hours"
)
), 0)
usethis::ui_line(
" {usethis::ui_field('Expires in')}: {usethis::ui_value(expires_id)} hours"
)
}
usethis::ui_line("")
}
# Start compliance job
#
# @param all_tweets Currently only "tweets" is allowed here.
# @param compliance_tweets If FALSE the function will not print any messages.
# @param compliance_users API bearer token.
#
#' @importFrom magrittr %>%
combine_jobs <- function(
all_tweets,
compliance_tweets,
compliance_users
) {
usethis::ui_info("Joining datasets, this might take a while ...")
#data.table::setDT(all_tweets)
if (!is.null(compliance_users)) {
#data.table::setDT(compliance_users)
compliance_users$created_at <- NULL
}
if (!is.null(compliance_tweets)) {
#data.table::setDT(compliance_tweets)
compliance_tweets$created_at <- NULL
}
selected_vars <- list()
if (!is.null(compliance_tweets)) {
selected_vars <- append(selected_vars, "status_id")
}
if (!is.null(compliance_users)) {
selected_vars <- append(selected_vars, "user_id")
}
if (all(c("created_at", "account_created_at") %in% names(all_tweets))) {
if (lubridate::is.POSIXct(all_tweets$created_at) &
lubridate::is.POSIXct(all_tweets$account_created_at)) {
selected_vars <-
append(selected_vars,
c("created_at", "account_created_at"))
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
} else {
usethis::ui_oops(
"The variables {usethis::ui_field('created_at')} and {usethis::ui_field('account_created_at')} must be object of class POSIXct."
)
usethis::ui_info(
"Only {usethis::ui_field('status_id')} and {usethis::ui_field('user_id')} will be used."
)
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
}
} else if ("created_at" %in% names(all_tweets)) {
if (lubridate::is.POSIXct(all_tweets$created_at)) {
selected_vars <- append(selected_vars, "created_at")
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
} else {
usethis::ui_oops(
"The variables {usethis::ui_field('created_at')} and {usethis::ui_field('account_created_at')} must be object of class POSIXct."
)
usethis::ui_info(
"Only {usethis::ui_field('status_id')} and {usethis::ui_field('user_id')} will be used."
)
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
}
} else if ("account_created_at" %in% names(all_tweets)) {
if (lubridate::is.POSIXct(all_tweets$account_created_at)) {
selected_vars <- append(selected_vars,"account_created_at")
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
} else {
usethis::ui_oops(
"The variables {usethis::ui_field('created_at')} and {usethis::ui_field('account_created_at')} must be object of class POSIXct."
)
usethis::ui_info(
"Only {usethis::ui_field('status_id')} and {usethis::ui_field('user_id')} will be used."
)
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
}
} else {
selected_vars <- unlist(selected_vars)
combined_tweets <- all_tweets %>% dplyr::select(tidyr::all_of(selected_vars))
}
combined_tweets <- unique(combined_tweets)
if (!is.null(compliance_users) & !is.null(compliance_tweets)) {
compliance_users <- compliance_users %>%
dplyr::arrange(redacted_at) %>%
dplyr::group_by(id) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::filter(id %in% unique(all_tweets$user_id))
compliance_tweets <- compliance_tweets %>%
dplyr::arrange(redacted_at) %>%
dplyr::group_by(id) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::filter(id %in% unique(all_tweets$status_id)) %>%
dplyr::mutate(reason = ifelse(reason == "geo_scrubbed", NA, reason))
combined_tweets <- combined_tweets %>%
dplyr::left_join((compliance_tweets %>% dplyr::select(-action)),
by = c("status_id" = "id")) %>%
dplyr::left_join((compliance_users %>% dplyr::select(-action)),
by = c("user_id" = "id"),
suffix = c("_tweets", "_users")
) %>%
dplyr::filter(!duplicated(.))
} else if (is.null(compliance_users)) {
compliance_tweets <- compliance_tweets %>%
dplyr::group_by(id) %>%
dplyr::arrange(redacted_at) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::filter(id %in% unique(all_tweets$status_id)) %>%
dplyr::mutate(reason = ifelse(reason == "geo_scrubbed", NA, reason))
combined_tweets <- combined_tweets %>%
dplyr::left_join((compliance_tweets %>% dplyr::select(-action)),
by = c("status_id" = "id"),
suffix = c("", "_tweets")
) %>%
dplyr::mutate(
reason_users = rep(c(reason[0], NA), length(reason)), # needs to be character(0)
redacted_at_users = rep(c(redacted_at[0], NA), length(redacted_at)) # needs to be character(0)
) %>%
dplyr::rename(reason_tweets = reason,
redacted_at_tweets = redacted_at) %>%
dplyr::filter(!duplicated(.))
} else if (is.null(compliance_tweets)) {
compliance_users <- compliance_users %>%
dplyr::group_by(id) %>%
dplyr::arrange(redacted_at) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::filter(id %in% unique(all_tweets$user_id))
combined_tweets <- combined_tweets %>%
dplyr::left_join((compliance_users %>% dplyr::select(-action)),
by = c("user_id" = "id"),
suffix = c("", "_users")
) %>%
dplyr::mutate(
reason_tweets = rep(c(reason[0], NA), length(reason)),
redacted_at_tweets = rep(c(redacted_at[0], NA), length(redacted_at))
) %>%
dplyr::rename(reason_users = reason,
redacted_at_users = redacted_at) %>%
dplyr::filter(!duplicated(.))
}
combined_tweets <- combined_tweets %>%
# take the available OR first time of redaction
dplyr::mutate(
redacted_at = dplyr::case_when(
(is.na(redacted_at_tweets) &
!is.na(redacted_at_users)) ~ redacted_at_users,
(is.na(redacted_at_users) &
!is.na(redacted_at_tweets)) ~ redacted_at_tweets,
(redacted_at_users < redacted_at_tweets) ~ redacted_at_users,
(redacted_at_users > redacted_at_tweets) ~ redacted_at_tweets
)
) %>%
# take the available OR first reason for redaction
dplyr::mutate(
online_status = dplyr::case_when(
is.na(reason_users) & is.na(reason_tweets) ~ "online",
is.na(reason_tweets) &
!is.na(reason_users) ~ reason_users,
is.na(reason_users) &
!is.na(reason_tweets) ~ reason_tweets,
is.na(redacted_at_tweets) |
redacted_at_tweets > redacted_at_users ~ reason_users,
is.na(redacted_at_users) |
redacted_at_users < redacted_at_tweets ~ reason_tweets,
(!is.na(reason_users) & !is.na(reason_tweets)) &
redacted_at_tweets > redacted_at_users~ reason_users,
(!is.na(reason_users) & !is.na(reason_tweets)) &
redacted_at_tweets < redacted_at_users~ reason_tweets
)
) %>%
dplyr::mutate(
online_status = dplyr::case_when(
online_status == "deleted" & is.na(reason_users) ~ "tweet_deleted",
online_status == "deleted" & is.na(reason_tweets) ~ "user_deleted",
online_status == "deleted" &
(
is.na(redacted_at_tweets) |
redacted_at_tweets > redacted_at_users
) ~ "user_deleted",
online_status == "deleted" &
(
is.na(redacted_at_users) |
redacted_at_tweets < redacted_at_users
) ~ "tweet_deleted",
reason_users == reason_tweets & redacted_at_users < redacted_at_tweets ~ paste0("user_", reason_users),
reason_users == reason_tweets & redacted_at_tweets < redacted_at_users ~ paste0("tweet_", reason_users),
TRUE ~ online_status
)
)
combined_tweets <- combined_tweets %>%
dplyr::select(
tidyr::all_of(selected_vars),
tidyr::ends_with("created_at"),
online_status,
redacted_at
)
return(combined_tweets)
}
#' Prints a twCompliance object
#'
#' @param x A twCompliance object.
#' @param ... further arguments passed to or from other methods.
#'
#' @export
print.twCompliance <- function(x, ...) {
cat(x$print_string)
}
#' Generate print string
#'
#' @param check_id A twCompliance object.
#'
gen_print <- function(check_id) {
if ("user_id" %in% names(check_id$data)) {
n_users <- format(length(unique(check_id$data$user_id)), big.mark = ",")
type <- "users"
}
if ("status_id" %in% names(check_id$data)) {
n_tweets <- format(length(unique(check_id$data$status_id)), big.mark = ",")
type <- "tweets"
}
if ("user_id" %in% names(check_id$data) & ("status_id" %in% names(check_id$data))) {
return(sprintf(
"A twCompliance check for\n\t %s tweets\n\t %s users\nStarted at %s",
n_tweets,
n_users,
check_id$checked_at
))
} else {
if (type == "users"){n <- n_users}
if (type == "tweets"){n <- n_tweets}
return(sprintf(
"A twCompliance check for\n\t %s %s\nStarted at %s",
n,
type,
check_id$checked_at
))
}
}
report_text <- function(df, type = "tweets") {
tbl <- df %>%
dplyr::count(online_status, sort = T) %>%
dplyr::mutate(p = round((n/sum(n))*100, 2)) %>%
dplyr::select(-n) %>%
tidyr::pivot_wider(names_from = online_status, values_from = p)
report_list <- c(sprintf("On %s", format(lubridate::date(unique(df$checked_at)), "%b %d, %Y")))
if ("online" %in% names(tbl)) {
if (length(names(tbl)) == 1) {
report_list <- c(report_list, sprintf("all %s were still available through the API.", type))
return(paste(report_list, collapse = " "))
}
report_list <- c(report_list, paste0(tbl$online, "% of all ", type))
} else {
report_list <- c(report_list, "no ", type)
}
report_list <- c(report_list, "were still available through the API. Unavailability was due to")
unavailability_list <- c()
unavailability_reasons <- names(tbl[names(tbl) != "online"])
for (x in unavailability_reasons) {
if (x == "suspended") {
unavailability_list <- c(unavailability_list, paste0("account suspension (", tbl[[x]], "%)"))
} else if (x == "user_deleted") {
unavailability_list <- c(unavailability_list, paste0("deleted user accounts (", tbl[[x]], "%)"))
} else if (x == "tweet_deleted") {
unavailability_list <- c(unavailability_list, paste0("deleted tweets (", tbl[[x]], "%)"))
} else if (x == "protected") {
unavailability_list <- c(unavailability_list, paste0("protected user accounts (", tbl[[x]], "%)"))
} else if (x == "deactivated") {
unavailability_list <- c(unavailability_list, paste0("deactivated user accounts (", tbl[[x]], "%)"))
}
}
if (length(unavailability_reasons) == 2) {
unavailability_list <- paste0(unavailability_list, collapse = " and ")
} else if (length(unavailability_reasons) > 2) {
unavailability_list <- paste0(
paste(unavailability_list[1:length(unavailability_list)-1], collapse = ", "),
", and ",
unavailability_list[length(unavailability_list)]
)
}
if (any(c("created_at", "account_created_at") %in% names(df))) {
if ("account_created_at" %in% names(df) & !"created_at" %in% names(df)) {
df <- df %>% dplyr::rename(created_at = account_created_at)
}
tbl_age <- df %>%
dplyr::filter(online_status != "online") %>%
dplyr::select(created_at, checked_at) %>%
dplyr::mutate(delta = checked_at-created_at) %>%
dplyr::summarise(M = mean(delta), MIN = min(delta), MAX = max(delta))
report_age <- sprintf("At the time of checking, the %s were on average %s %s old.", type, round(tbl_age$M, 0), units(tbl_age$M))
p_has_redacted <- df %>% dplyr::filter(online_status != "online") %>% dplyr::summarise(p = mean(!is.na(redacted_at)))
tbl_delta <- df %>%
dplyr::filter(online_status != "online") %>%
dplyr::mutate(delta = redacted_at - created_at) %>%
dplyr::filter(!is.na(delta)) %>%
dplyr::summarise(M = mean(delta), MIN = min(delta), MAX = max(delta))
report_delta <- sprintf(
" The %s percent of %s where an redacted_at time was available, were on average made unavailabe %s %s after creation (MIN = %s %s, MAX = %s %s)",
round(p_has_redacted*100, 2),
type,
round(tbl_delta$M, 0),
units(tbl_delta$M),
round(tbl_delta$MIN, 0),
units(tbl_delta$MIN),
round(tbl_delta$MAX, 0),
units(tbl_delta$MAX)
)
return(paste0(paste(c(report_list, unavailability_list), collapse = " "), ". ", report_age, report_delta, "\n"))
} else {
return(paste0(paste(c(report_list, unavailability_list), collapse = " "), ".\n"))
}
}
#' @importFrom patchwork plot_layout
report_plot <- function(df, type = "tweets", combine_plots = TRUE) {
clr_plt <- list(
"online" = "#66a61e",
"tweet_deleted" = "#d95f02",
"suspended" = "#7570b3",
"user_deleted" = "#e7298a",
"deactivated" = "#e6ab02",
"protected" = "#a6761d"
)
clr_plt <- clr_plt[names(clr_plt) %in% unique(df$online_status)]
if (type == "users") {
viz <- dplyr::rename(df, created_at = account_created_at) |> dplyr::filter(!is.na(created_at))
} else {
viz <- dplyr::filter(df, !is.na(created_at))
}
time_span <- max(viz$created_at, na.rm = T) - min(viz$created_at, na.rm = T)
time_span_units <- units(time_span)
if (time_span_units == "days") {
if (time_span > 365*7) {
viz <- viz |>
dplyr::mutate(y = format(created_at, "%Y")) |>
dplyr::mutate(w = format(created_at, "%U")) |>
dplyr::mutate(created_at = lubridate::ymd(paste0(y,"-01-01")) + lubridate::weeks(w))
time_span_units <- "weeks"
}
viz <- dplyr::mutate(viz, created_at = lubridate::date(created_at))
} else if (time_span_units == "hours") {
viz <- dplyr::mutate(viz, created_at = as.POSIXct(format(created_at, "%Y-%m-%dT%H:00:00"), format = "%Y-%m-%dT%H:%M:%S"))
} else if (time_span_units == "minutes") {
viz <- dplyr::mutate(viz, created_at = as.POSIXct(format(created_at, "%Y-%m-%dT%H:%M:00"), format = "%Y-%m-%dT%H:%M:%S"))
}
fig.1 <- viz |>
dplyr::group_by(created_at) |>
dplyr::count(online_status) |>
dplyr::ungroup() |>
tidyr::pivot_wider(names_from = online_status, values_from = n, values_fill = 0) |>
tidyr::pivot_longer(-created_at, names_to = "online_status", values_to = "n") |>
dplyr::mutate(online_status = factor(online_status, levels = c(
"online",
"tweet_deleted",
"suspended",
"user_deleted",
"deactivated",
"protected"
))) |>
ggplot2::ggplot(ggplot2::aes(x = created_at, y = n, fill = online_status)) +
ggplot2::geom_area(position = "stack") +
ggplot2::scale_fill_manual(values = clr_plt) +
ggplot2::labs(
title = "Absolute counts by time of publishing",
subtitle = sprintf("Aggregated by %s", time_span_units),
y = "Count",
x = sprintf("Created at (by %s)", time_span_units),
fill = "Online Status"
)
fig.2 <- viz |>
dplyr::group_by(created_at) |>
dplyr::count(online_status) |>
dplyr::mutate(p = n/sum(n)) |>
dplyr::ungroup() |>
dplyr::select(-n) |>
tidyr::pivot_wider(names_from = online_status, values_from = p, values_fill = 0) |>
tidyr::pivot_longer(-created_at, names_to = "online_status", values_to = "p") |>
dplyr::mutate(online_status = factor(online_status, levels = c(
"online",
"tweet_deleted",
"suspended",
"user_deleted",
"deactivated",
"protected"
))) |>
ggplot2::ggplot(ggplot2::aes(x = created_at, y = p, fill = online_status)) +
ggplot2::geom_area(position = "stack") +
ggplot2::scale_fill_manual(values = clr_plt) +
ggplot2::scale_y_continuous(labels = scales::percent) +
ggplot2::labs(
title = "Relative shares by time of publishing",
subtitle = sprintf("Aggregated by %s", time_span_units),
y = "Share",
x = sprintf("Created at (by %s)", time_span_units),
fill = "Online Status"
)
time_span_units <- units(as.POSIXct(viz$redacted_at) - as.POSIXct(viz$created_at))
p_redacted_at <- format(mean(!is.na(viz$redacted_at))*100, digits = 2, nsmall = 2)
quantiles_delta <- stats::quantile(as.numeric(as.POSIXct(viz$redacted_at) - as.POSIXct(viz$created_at)), c(.25, .5, .75), na.rm = T)
fig.3 <- viz |>
dplyr::filter(!is.na(redacted_at)) |>
dplyr::filter(online_status != "online") |>
dplyr::mutate(delta = as.numeric(as.POSIXct(redacted_at) - as.POSIXct(created_at))) |>
ggplot2::ggplot(ggplot2::aes(x = delta)) +
ggplot2::geom_density() +
ggplot2::geom_vline(ggplot2::aes(xintercept = quantiles_delta[1]), linetype = "dotted") +
ggplot2::geom_vline(ggplot2::aes(xintercept = quantiles_delta[2]), size = 1, linetype = "dotted") +
ggplot2::geom_vline(ggplot2::aes(xintercept = quantiles_delta[3]), linetype = "dotted") +
ggplot2::scale_x_continuous() +
ggplot2::labs(
title = sprintf("Time difference between creation and retraction of %s", type),
subtitle = sprintf(
"Redaction time information available for %s%% of %s. \nDotted lines represent 25%%, 50%%, and 75%% quantiles.",
p_redacted_at, type
),
x = sprintf("Time difference (%s)", time_span_units),
y = "Density"
)
if (combine_plots == TRUE) {
fig.1 <- (fig.1 + ggplot2::theme(legend.position = "none"))
fig.2 <- (fig.2 + ggplot2::theme(legend.position = "bottom", legend.box = "horizontal"))
fig <- (fig.1 / fig.2 / fig.3)
fig + patchwork::plot_annotation(
title = sprintf("Online status of %s", type)
)
} else {
return(list("abs_counts"=fig.1, "rel_shares"=fig.2, "redacted_delta"=fig.3))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.