R/graph_metrics/metric_strategies.R

Defines functions node_metric_cumulative_importance update_graph_with_metrics calculate_metric_importance

library(randomcoloR)
library(logger)

calculate_metric_importance <- function(g, metric_name) {
  #' @param g graph
  #' @param metric_name string holding the attribute name
  #' @description return a dataframe with the % contribution to the overall
  #' metric value
  
  metric_data <- data.frame(station = vertex_attr(g, 'name'),
                            metric = vertex_attr(g, metric_name))
  metric_data <- metric_data %>%
    arrange(desc(metric)) %>% # sort metric in descending order
    mutate(metric_rate = (metric / sum(metric)) * 100) %>% # calculate metric contribution %
    mutate (metric_rate = round(metric_rate, 2)) # Round metric_range to 2 decimals
    
  metric_data <- set_extreme_vals_to_zero(metric_data)
  return(metric_data)
  
}

calculate_importance_by_cumulative_percentage <- function (g,
                                                           metric_data,
                                                           thresholds) {
  #' @param g graph
  #' @param metric_data a data_frame with a summary of the metric
  #' @param threshold a float indicating the % above which nodes are no longer important
  #' @description sets what nodes will be chosen as important and calculate the
  #' metric in % for cumulative strategies
  
  original_station_order <- vertex_attr(g, 'name')
  th_percentage <- thresholds[1]
  th_min_num <- thresholds[2]
  
  # (1) Generate by-importance node selecting column
  metric_data <- metric_data %>%
    mutate(metric_func = cumsum(metric_rate)) %>%
    mutate (selected = ifelse(metric_func <= th_percentage, 1, 0))
  
  # (2) If only one node gets more than the threshold then make it the
  # protagonist
  if (length(which(metric_data$selected == 1)) == 0) {
    metric_data$selected[1] = 1
  }
  
  # (3) is the th_percentage producing too little? --> apply th_min_num
  else if (length(which(metric_data$selected == 1)) < th_min_num) {
    num_important_nodes <- min(th_min_num, nrow(metric_data))
    selected <- c(rep(1, num_important_nodes),
                  rep(0, abs(
                    nrow(metric_data) - num_important_nodes
                  )))
    metric_data$selected <- selected
  }
  
  # (4) Generate unique distinct color important nodes and black for the rest
  metric_data_col_length <- nrow(metric_data)
  num_colors <- length(which(metric_data$selected == 1))
  palette <- distinctColorPalette(num_colors)
  node_colors <- c(palette,
                   rep('#000000', abs(metric_data_col_length - num_colors)))
  metric_data$node_color <- node_colors
  
  
  # (5) Re-order the dataframe following the order of the original graph vertex matrix
  metric_data <- metric_data[match(original_station_order,
                                   metric_data$station), ]
  
  return(metric_data)
  
}

update_graph_with_metrics <- function(g, metric_data, metric_name) {
  #' @param g graph
  #' @param metric_data a data_frame with a summary of the metric according to
  #' the chosen strategy
  #' @param metric_name string holding the name of the metric to be processed
  
  
  g <- g %>%
    set_vertex_attr(paste(metric_name, "_rate", sep = ""),
                    value = metric_data$metric_rate) %>%
    set_vertex_attr(paste(metric_name, "_selected", sep = ""),
                    value = metric_data$selected) %>%
    set_vertex_attr(paste(metric_name, "_node_color", sep = ""),
                    value = metric_data$node_color)
  return(g)
}


node_metric_cumulative_importance <-
  function(g, metric_name, threshold) {
    #' @param g graph
    #' @param metric_name string holding the name of the metric to be processed
    #' @param threshold a float indicating the % above which nodes are no longer important
    #' @description establish a cumulative contribution threshold as the strategy to
    #' determine what nodes are important or not
    
    metric_data <- calculate_metric_importance(g, metric_name)
    metric_data <- calculate_importance_by_cumulative_percentage (g,
                                                                  metric_data,
                                                                  threshold)
    g <- update_graph_with_metrics(g, metric_data, metric_name)
    return (list(graph = g, metric_data = metric_data))
    
  }
d2gex/seabasstfm documentation built on July 29, 2022, 2:20 a.m.