Nothing
#' @title Add columns containing text data to twitter activity network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param hashtags Logical. Add tweet hashtags to dataframes. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#' containing text data.
#'
#' @aliases AddText.activity.twitter
#' @name AddText.activity.twitter
#' @export
AddText.activity.twitter <- function(net, data, hashtags = FALSE, ...) {
net$nodes <- add_tweet_text(net$nodes, data$tweets, hashtags)
class(net) <- union(class(net), c("voson.text"))
msg("Done.\n")
net
}
#' @title Add columns containing text data to twitter actor network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param hashtags Logical. Add tweet hashtags to dataframes. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#' containing text data.
#'
#' @aliases AddText.actor.twitter
#' @name AddText.actor.twitter
#' @export
AddText.actor.twitter <- function(net, data, hashtags = FALSE, ...) {
net$edges <- add_tweet_text(net$edges, data$tweets, hashtags)
class(net) <- union(class(net), c("voson.text"))
msg("Done.\n")
net
}
#' @title Add columns containing text data to twitter semantic network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param hashtags Logical. Add tweet hashtags to dataframes. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#' containing text data.
#'
#' @aliases AddText.semantic.twitter
#' @name AddText.semantic.twitter
#' @export
AddText.semantic.twitter <- function(net, data, hashtags = FALSE, ...) {
net$edges <- add_tweet_text(net$edges, data$tweets, hashtags)
class(net) <- union(class(net), c("voson.text"))
msg("Done.\n")
net
}
#' @title Add columns containing text data to twitter 2mode network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param hashtags Logical. Add tweet hashtags to dataframes. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#' containing text data.
#'
#' @aliases AddText.twomode.twitter
#' @name AddText.twomode.twitter
#' @export
AddText.twomode.twitter <- function(net, data, hashtags = FALSE, ...) {
net$edges <- add_tweet_text(net$edges, data$tweets, hashtags)
class(net) <- union(class(net), c("voson.text"))
msg("Done.\n")
net
}
# extract text from nested tweet fields
extract_nested_tweet_text <- function(x, var, hashtags = FALSE) {
df <- x |>
dplyr::select(parent_status_id = .data$status_id, {{ var }}) |>
tidyr::unnest(cols = c({{ var }}))
if (ncol(df) == 1) return(NULL)
df <- df |>
dplyr::filter(!is.na(.data$id_str)) |>
dplyr::select(.data$parent_status_id, status_id = .data$id_str, .data$full_text, .data$entities) |>
dplyr::distinct(.data$parent_status_id, .keep_all = TRUE)
if (hashtags) {
df <- df |>
dplyr::mutate(hashtags = .data$entities$hashtags) |>
tidyr::hoist(
.col = .data$hashtags,
hashtags = list("text"),
.remove = FALSE
)
}
df <- df |> dplyr::select(-.data$entities)
df
}
# extract and add text to twitter network nodes or edges
add_tweet_text <- function(objs, data, hashtags = FALSE) {
objs <-
dplyr::left_join(
objs,
dplyr::select(
data,
.data$status_id,
t.is_reply = .data$is_reply,
t.is_quote = .data$is_quote,
t.is_retweet = .data$is_retweet,
t.full_text = .data$full_text,
entities = .data$entities
) |> dplyr::distinct(.data$status_id, .keep_all = TRUE),
by = "status_id")
if (hashtags) {
objs <- objs |>
tidyr::hoist(
.col = .data$entities,
t.hashtags = list("hashtags", "text"),
.remove = FALSE
)
}
objs <- objs |>
dplyr::select(-.data$entities) |>
dplyr::mutate(
t.full_text = data.table::fifelse(!is.na(.data$t.full_text), .data$t.full_text, NA_character_)
)
# separate to simplify joins
# tweets are present in data and refs are referenced tweets derived from data
objs_tweets <- objs |>
dplyr::filter(!is.na(.data$t.is_retweet) & !is.na(.data$t.is_quote))
objs_refs <- objs |>
dplyr::filter(is.na(.data$t.is_retweet) & is.na(.data$t.is_quote))
# nested quote tweets
qs <- extract_nested_tweet_text(data, "qs", hashtags)
if (!is.null(qs)) {
qs <- qs |> dplyr::rename_with(function(x) paste0("t.quoted.", x))
objs_tweets <- objs_tweets |>
dplyr::left_join(qs, by = c("status_id" = "t.quoted.parent_status_id"))
objs_refs <- objs_refs |>
dplyr::left_join(
qs |>
dplyr::select(-.data$t.quoted.parent_status_id) |>
dplyr::distinct(.data$t.quoted.status_id, .keep_all = TRUE),
by = c("status_id" = "t.quoted.status_id")
)
}
# nested retweets
rts <- extract_nested_tweet_text(data, "rts", hashtags)
if (!is.null(rts)) {
rts <- rts |> dplyr::rename_with(function(x) paste0("t.retweeted.", x))
objs_tweets <- objs_tweets |>
dplyr::left_join(rts, by = c("status_id" = "t.retweeted.parent_status_id"))
objs_refs <- objs_refs |>
dplyr::left_join(
rts |>
dplyr::select(-.data$t.retweeted.parent_status_id) |>
dplyr::distinct(.data$t.retweeted.status_id, .keep_all = TRUE),
by = c("status_id" = "t.retweeted.status_id")
)
}
objs <- dplyr::bind_rows(objs_tweets, objs_refs)
# set a default text
objs <- objs |> dplyr::mutate(
vosonTxt_tweet = data.table::fcase(
.data$t.is_retweet == TRUE, .data$t.retweeted.full_text,
is.na(.data$t.full_text) & !is.na(.data$t.retweeted.full_text), .data$t.retweeted.full_text,
is.na(.data$t.full_text) & !is.na(.data$t.quoted.full_text), .data$t.quoted.full_text,
!is.na(.data$t.full_text), .data$t.full_text,
default = NA_character_
)
)
if (hashtags) {
objs$t.hashtags <- lapply(objs$t.hashtags, null_to_na)
objs$t.quoted.hashtags <- lapply(objs$t.quoted.hashtags, null_to_na)
objs$t.retweeted.hashtags <- lapply(objs$t.retweeted.hashtags, null_to_na)
objs <- objs |>
dplyr::rowwise() |>
dplyr::mutate(
vosonTxt_hashtags = data.table::fcase(
.data$t.is_retweet == TRUE, paste0(.data$t.retweeted.hashtags, collapse = ","),
is.na(.data$t.full_text) & !is.na(.data$t.retweeted.full_text),
paste0(.data$t.retweeted.hashtags, collapse = ","),
is.na(.data$t.full_text) & !is.na(.data$t.quoted.full_text),
paste0(.data$t.quoted.hashtags, collapse = ","),
!is.na(.data$t.full_text), paste0(.data$t.hashtags, collapse = ","),
default = NA_character_
)
)
}
objs$vosonTxt_tweet <- textutils::HTMLdecode(objs$vosonTxt_tweet)
objs <- objs |> dplyr::relocate(.data$vosonTxt_tweet, .after = dplyr::last_col())
if (hashtags) objs <- objs |> dplyr::relocate(.data$vosonTxt_hashtags, .after = dplyr::last_col())
objs
}
# retweet text header
retweet_header_regex <- function() {
"^(RT\\s@[A-Za-z0-9_]+:)?\\s"
}
# replaces truncated retweet full_text with embedded entities full_text
# extracts and prepends existing retweet header to tweet text
retweet_full_text <- function(x) {
rts <- extract_nested_tweet_text(x, "rts", FALSE)
if (!is.null(rts)) {
rts <- rts |> dplyr::rename_with(function(x) paste0("t.retweeted.", x)) |>
dplyr::select(.data$t.retweeted.parent_status_id, .data$t.retweeted.full_text)
x <- x |>
dplyr::left_join(rts, by = c("status_id" = "t.retweeted.parent_status_id")) |>
dplyr::mutate(full_text = ifelse(.data$is_retweet == TRUE & !is.na(.data$t.retweeted.full_text),
paste(
stringr::str_match(.data$full_text, retweet_header_regex()),
.data$t.retweeted.full_text
),
.data$full_text)) |>
dplyr::select(-.data$rts, -.data$t.retweeted.full_text)
}
x
}
# joins add nulls into dataframes
null_to_na <- function(x) {
if (length(x) < 1) return(c(NA_character_))
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.