R/group_stats.r

Defines functions group_stats

Documented in group_stats

#' group_stats
#'
#' @description
#' With this helper function, you can obtain summary statistics for the objects
#' in the network.
#'
#' @param coord_graph A result `igraph` generated by \link{generate_coordinated_network}
#' @param weight_threshold The level of the network for which to calculate the statistic.
#' It can be "full," "fast," or "none." The first two options are applicable only
#' if the data includes information on a faster network, as calculated with the
#' \link{flag_speed_share} function. These options preliminarily filter the nodes
#' based on their inclusion in the subgraph filtered by edge weight threshold
#' ("full"), filtered by edges created in the faster time window and surpassing
#' the edge weight threshold in that network ("fast"), or apply to the unfiltered
#' graph ("none").


#' @return a `data.table` with summary statistics
#'
#' @import data.table
#' @import igraph
#' @export
#'
#'

group_stats <- function(coord_graph, weight_threshold = c("full", "fast", "none")) {
    object_id_list <- from <- to <- object_id <- num_accounts <- object_ids_fast <- weight_threshold_fast <- weight_threshold_full <- NULL

    x <- data.table::as.data.table(igraph::as_data_frame(coord_graph))

    # Identify the column name that starts with 'object_ids'
    object_ids_column <- names(x)[startsWith(names(x), "object_ids")]

    # Ensure that the column exists
    if (length(object_ids_column) == 0) {
        stop("Column starting with 'object_ids' not found in the data.table. This function only applies when the object_ids are retained in the 'generate_coordinated_network' function (objects = TRUE). If you wish to use this function, ensure you select this option.")
    }

    # Filtering based on 'weight_threshold' option -----------------------------
    # weight_threshold 'full' --------
    if (weight_threshold == "full") {

        if(length(object_ids_column) > 1){
            object_ids_column <- "object_ids_full"
        }

        # Filter by edge weight threshold
        if (any(grepl("_full", names(x)))) {
            x <- x[weight_threshold_full == 1]
        } else {
            x <- x[weight_threshold == 1]
        }
    }

    # weight_threshold 'fast' --------
    if (weight_threshold == "fast") {

        if (!any(grepl("_fast", names(x)))) {
            stop("weight_threshold = 'fast' has been selected, but the provided network was not created using a table updated with the 'flag_speed_share' function. Please use the weight_threshold = 'none' option or provide a table containing the necessary information.")
        }

        if(length(object_ids_column) > 1){
            object_ids_column <- "object_ids_fast"
        }

        # Filter by edge weight threshold
         x <- x[weight_threshold_fast == 1]
    }

    # weight_threshold 'none' --------
    if (weight_threshold == "none") {

        if(length(object_ids_column) > 1){
            object_ids_column <- "object_ids_full"
        }
    }

    # Extract object ids and calculate summary
    x[, object_id_list := strsplit(get(object_ids_column), split = ",", fixed = TRUE)]

    unnested <-
        x[, .(object_id = unlist(object_id_list)), by = .(from, to)]

    object_id_summary <-
        unnested[, .(num_accounts = uniqueN(from)), by = .(object_id)]

    # Filter out NA values in 'object_id'
    object_id_summary <- object_id_summary[!is.na(object_id)]

    # Order by 'num_accounts' in descending order
    setorder(object_id_summary, -num_accounts)

    # Return the summary table
    return(object_id_summary)

}

Try the CooRTweet package in your browser

Any scripts or data that you put into this service are public.

CooRTweet documentation built on April 4, 2025, 2:25 a.m.