Nothing
#' account_stats
#'
#' @description
#' Calculate account statistics: total posts shared, average time delta,
#' average edge symmetry score.
#'
#' @details
#' With this helper function, you can obtain summary statistics for the accounts in
#' the network. When applied to a network for which a narrower `time_window` has
#' been calculated using the \link{flag_speed_share} function, the summary statistics
#' are computed separately for the full and faster networks depending on the
#' 'weight_threshold' option. When this option is set to "full", metrics are
#' computed on the set of nodes and edges surpassing the user-defined
#' edge_weight threshold in the \link{generate_coordinated_network} function. Also,
#' metrics for nodes and edges in the fastest network are returned, but they are
#' calculated on the specified subgraph. The same applies when the 'weight_threshold'
#' option is set to "fast". In this case, metrics are calculated on the fast subgraph.
#' When the option is set to "null", the entire inputted graph without further
#' subsetting is considered.
#'
#' The node share count is performed thanks to the table resulting from the
#' \link{detect_groups} function. If the user has used the optional
#' \link{flag_speed_share} function and decides to calculate statistics on
#' the fastest graph (by setting weight_threshold = "fast"), the share count
#' is calculated considering only shares made in the fastest time window.
#' Alternatively, shares in the largest time window are considered (option
#' weight_threshold = "full" or weight_threshold = "none"). When calculating the
#' share count, all shares made by accounts are considered, regardless of
#' whether they are shares of posts shared in a coordinated fashion or not,
#' according to the edge weight threshold. In other words, this is a measure of
#' an account's activity in the time window under consideration.
#'
#' @param coord_graph an `igraph` object generated by \link{generate_coordinated_network}
#' @param result a table generated by \link{detect_groups}
#' @param weight_threshold The threshold to be used for filtering the graph
#' (options: "full", "fast", or "none").
#' @return a data.table with summary statistics for each account
#'
#' @import data.table
#' @import igraph
#' @export
#'
#'
account_stats <- function(coord_graph, result, weight_threshold = c("full", "fast", "none")) {
weight_threshold_full <- weight_threshold_fast <- from <- n_content_id_full <-
n_content_id_fast <- avg_time_delta_full <- avg_time_delta_fast <- edge_symmetry_score_full <-
edge_symmetry_score_fast <- to <- n_content_id_y_full <- n_content_id_y_fast <- n_content_full <-
n_content_fast <- account_id <- n_content_id <- avg_time_delta <- edge_symmetry_score <- n_content_id_y <-
n_content <- shares <- NULL
# Check if "_fast" and "_full" are in column names
if (any(grepl("_fast", igraph::edge_attr_names(coord_graph))) &&
any(grepl("_full", igraph::edge_attr_names(coord_graph)))) {
# Check if "object_ids" are included and remove the relative attributes
if (any(grepl("object_ids", igraph::edge_attr_names(coord_graph)))) {
# List of edge attributes to be removed
edge_attrs_to_remove <-
c("object_ids_full", "object_ids_fast")
# Removing edge attributes using a loop
for (attr in edge_attrs_to_remove) {
coord_graph <- igraph::delete_edge_attr(coord_graph, attr)
}
}
x <-
data.table::as.data.table(igraph::as_data_frame(coord_graph))
# Filter by edge weight threshold
if (weight_threshold == "full") {
x <- x[weight_threshold_full == 1]
}
if (weight_threshold == "fast") {
x <- x[weight_threshold_fast == 1]
}
# Reshape and summarize
reshaped <- rbind(x[, .(
account_id = from,
avg_time_delta_full,
avg_time_delta_fast,
edge_symmetry_score_full,
edge_symmetry_score_fast
)],
x[, .(
account_id = to,
avg_time_delta_full,
avg_time_delta_fast,
edge_symmetry_score_full,
edge_symmetry_score_fast
)])
reshaped <- unique(reshaped)
account_summary <- reshaped[, .(
avg_time_delta_full = mean(avg_time_delta_full, na.rm = TRUE),
avg_time_delta_fast = mean(avg_time_delta_fast, na.rm = TRUE),
avg_edge_symmetry_score_full = mean(edge_symmetry_score_full, na.rm = TRUE),
avg_edge_symmetry_score_fast = mean(edge_symmetry_score_fast, na.rm = TRUE)
), by = .(account_id)]
account_summary <-
account_summary[, lapply(.SD, function(x) {
x[is.nan(x)] <- NA
return(x)
})]
# get unique shares count from the result table ------------------------
# Filter by edge weight threshold
if (weight_threshold == "fast") {
# Find the first column whose name contains "time_window_"
fast_flag_column_index <- grep("time_window_", names(result))[1]
# Get the actual column name
fast_flag_column <- names(result)[fast_flag_column_index]
# Filter rows where this column is equal to 1
result <- result[result[[fast_flag_column]] == 1, ]
}
shares_count <- as.data.table(data.frame(
account_id = c(result$account_id,
result$account_id_y),
shares = c(result$content_id,
result$content_id_y)
))
shares_count <-
shares_count[, .(unique_shares_count = length(unique(shares))),
by = .(account_id)]
# merge shares' count to account summary
account_summary <-
account_summary[shares_count, on = "account_id", nomatch = 0]
data.table::setorder(account_summary, "account_id")
return(account_summary)
}
# --------------------------------------------------------------------------
# if the data has not been updated with the `flag_speed_share` function ----
if (!any(grepl("_fast", igraph::edge_attr_names(coord_graph))) &&
!any(grepl("_full", igraph::edge_attr_names(coord_graph)))) {
{
# If data includes object_ids, remove unused edge attributes
if (any(grepl("object_ids", igraph::edge_attr_names(coord_graph)))) {
coord_graph <- igraph::delete_edge_attr(coord_graph, "object_ids")
}
x <-
data.table::as.data.table(igraph::as_data_frame(coord_graph))
# Filter by edge weight threshold
if (weight_threshold == "full") {
x <- x[weight_threshold == 1]
}
if (weight_threshold == "fast") {
warning(
"The data does not include the fastest shares. Metrics are calculated based on all the data.\nIf you wish to calculate metrics for a narrower time window, be sure to use the `flag_speed_share` function before creating the network."
)
}
reshaped <- rbind(x[, .(
account_id = from,
avg_time_delta,
edge_symmetry_score
)],
x[, .(
account_id = to,
avg_time_delta,
edge_symmetry_score
)])
# Remove duplicates
reshaped <- unique(reshaped)
# Aggregate to find the average values for each account and content pair
account_summary <- reshaped[, .(
avg_time_delta = mean(avg_time_delta, na.rm = TRUE),
avg_edge_symmetry_score = mean(edge_symmetry_score, na.rm = TRUE)
), by = .(account_id)]
account_summary <-
account_summary[, lapply(.SD, function(x) {
# Replace NaN with NA
x[is.nan(x)] <- NA
return(x)
})]
# get unique shares count from the result table --------------------
shares_count <- data.table::as.data.table(data.frame(
account_id = c(result$account_id,
result$account_id_y),
shares = c(result$content_id,
result$content_id_y)
))
shares_count <-
shares_count[, .(unique_shares_count = length(unique(shares))),
by = .(account_id)]
# merge shares' count to account summary
account_summary <-
account_summary[shares_count, on = "account_id", nomatch = 0]
data.table::setorder(account_summary, "account_id")
return(account_summary)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.