R/graph_metrics/grid_plotting_metric_utils.R

Defines functions generate_all_graph_grid_metrics_stations_whole_series generate_full_graph_grid_metrics_whole_series generate_full_graph_grid_metrics_yearly get_metric_top_scores_at_most get_all_metrics_sum_by_group_name get_yearly_metrics merge_metrics_detections rbind_all_metrics get_apl_monthly_metrics rbind_av_path_length get_node_monthly_metrics

source("R/detections/detection_views.R")

get_node_monthly_metrics <- function(data) {
  month_names <- names(data)
  monthly_metrics <- list()
  
  for (i in 1:length(data)) {
    month_name <- month_names[i]
    month_data <- data[[i]]
    
    if (nrow(month_data) > 0) {
      graph_context <- build_graph_context(month_data)
      monthly_metrics[[month_name]] <-
        get_montly_metrics (graph_context, month_name)
      
      if (is.null(graph_context$simple_g)) {
        log_warn("No-loops graph for month ",
                 month_name,
                 " is empty")
        
      }
      
      if (is.null(graph_context$only_loops_g)) {
        log_warn("Only-loops graph for individual ",
                 month_name,
                 " is empty")
      }
    }
    else {
      log_warn(month_name, " with no individual activity")
    }
  }
  return(monthly_metrics)
  
}

rbind_av_path_length <-
  function(av_data,
           monthly_av_data_vector,
           individual_name,
           month_name) {
    df <-
      data.frame(mean_distance = monthly_av_data_vector,
                 ind_name = individual_name,
                 period = month_name)
    av_data <-
      rbind(av_data, df) %>% mutate_if(is.numeric, round, 2)
    return(av_data)
    
  }

get_apl_monthly_metrics <- function(data) {
  individuals <- unique(data$ind_name)
  average_path_length <- data.frame()
  for (i in seq_along(individuals)) {
    individual_name <- individuals[i]
    
    # (2) slice data by individual and moths
    individual_monthly_data <- data %>%
      fetch_individual(individual_name) %>%
      slide_data_by_date_interval(start, end, granularity = granularity)
    
    # (3) Get month names
    month_names <- names(individual_monthly_data)
    monthly_av_path_length <- c()
    for (j in seq_along(individual_monthly_data)) {
      month_name <- month_names[j]
      month_data <- individual_monthly_data[[j]]
      monthly_av_path_length[j] <- 0
      if (nrow(month_data) > 1) {
        graph_context <- build_graph_context(month_data)
        if (!is.null(graph_context$simple_g)) {
          monthly_av_path_length[j] <- graph_context$average_distance
        }
      }
    }
    average_path_length <-
      rbind_av_path_length(average_path_length,
                           monthly_av_path_length,
                           individual_name,
                           month_names)
  }
  return(average_path_length)
}

rbind_all_metrics <- function(metrics) {
  full_metrics <- list ()
  simple_metrics <- list ()
  loops_details <- list ()
  for (i in seq_along(metrics)) {
    month_metrics <- metrics[[i]]
    full_metrics[[i]] <- month_metrics$full
    simple_metrics[[i]] <- month_metrics$simple_graph
    loops_details[[i]] <- month_metrics$loops
  }
  
  full_metrics = bind_rows(full_metrics)
  loops_details = bind_rows(loops_details)
  
  full_metrics <- merge(full_metrics, loops_details, all = T)
  full_metrics[sapply(full_metrics, is.na)] <- 0
  simple_metrics <- bind_rows(simple_metrics)
  return (list(full_metrics = full_metrics, simple_metrics = simple_metrics))
  
}

merge_metrics_detections <-
  function(metrics, detections, is_full_graph = TRUE) {
    metrics <- merge(metrics, detections, all = T)
    metrics[sapply(metrics, is.na)] <- 0
    if (is_full_graph) {
      col_names <- c(
        'period',
        'degree',
        'betweenness',
        'eigenvector',
        'loops',
        'detections',
        'station'
      )
    }
    else {
      col_names <- c('period',
                     'degree',
                     'betweenness',
                     'eigenvector',
                     'detections',
                     'station')
    }
    metrics <- metrics [, col_names]
    
  }

get_yearly_metrics <- function(metrics, year) {
  return(filter(metrics, grepl(year, period, ignore.case = TRUE)))
}

get_top_score_single_metric <-
  function (metrics, group_name,  metric_name, max_num) {
    return (metrics %>%
              group_by_at(c(group_name, metric_name)) %>%
              arrange(desc(!!sym(metric_name))) %>%
              head(max_num))
  }

get_all_metrics_sum_by_group_name <- function(metrics, group_name) {
  return(metrics %>%
           group_by_at(group_name) %>%
           summarise_each(list(sum)))
  
}
get_metric_top_scores_at_most <-
  function(metrics,  metric_names, max_num) {
    # Gets up to max_num rows with highest scores for each metric
    metric_top_scorers <- lapply(metric_names, function (x) {
      return(metrics %>% arrange(desc(!!sym(x))) %>% head(max_num))
    })
    return(unique(bind_rows(metric_top_scorers)))
    
  }

generate_metric_bar_plots_for_one_row <- function (metrics,
                                                   group_name,
                                                   year,
                                                   metric_names,
                                                   max_bars,
                                                   show_x_axis_label = TRUE,
                                                   is_full_graph = TRUE,
                                                   is_whole_series = FALSE,
                                                   main_title =  NULL,
                                                   title_position = 0,
                                                   p_title_size = 0.5) {
  all_plots <- list()
  
  if (is.null(main_title)) {
    main_title <- paste("Year ", year)
  }
  
  if (group_name == 'period') {
    month_as_name <- TRUE
    x_lab <- "Months"
  }
  else {
    month_as_name <- FALSE
    x_lab <- "Stations"
  }
  
  for (i in seq_along(metric_names)) {
    m_name <- metric_names[i]
    if ((title_position > 0 & title_position == i) |
        (is_full_graph & !is_whole_series & i == 3)) {
      title = main_title
    }
    else {
      title = ""
    }
    
    # Get only up to max_bars rows per metric
    df <- metrics %>%
      select(!!m_name,!!group_name) %>%
      arrange(desc(!!sym(m_name))) %>%
      head(max_bars)
    
    all_plots[[i]] <-
      generate_bar_plot(
        df,
        group_name,
        m_name,
        title,
        NULL,
        x_lab,
        str_to_title(str_replace(m_name, "_", " ")),
        month_as_name = month_as_name,
        show_bar_values = FALSE,
        show_legend = FALSE,
        show_title = TRUE,
        show_x_axis_label = show_x_axis_label,
        p_title_size = p_title_size
      )
  }
  return(all_plots)
  
}

generate_full_graph_grid_metrics_yearly <-
  function(node_metric_details,
           apl_metric_details,
           years,
           max_bars) {
    grid_plots <- list()
    for (i in seq_along(years)) {
      # (a) get metrics for node and apl
      annual_node_data <-
        get_yearly_metrics (node_metric_details, years[i])
      annual_apl_data <-
        get_yearly_metrics(apl_metric_details, years[i])
      
      # (b) sums of node and apl metrics by period and merge both
      node_metric_to_bar_plot <-
        get_all_metrics_sum_by_group_name (annual_node_data, 'period')
      
      node_apl_to_bar_plot <-
        get_all_metrics_sum_by_group_name (annual_apl_data, 'period')
      
      data_to_bar_plot <-
        merge(node_metric_to_bar_plot, node_apl_to_bar_plot, all = TRUE)
      data_to_bar_plot[sapply(data_to_bar_plot, is.na)] <- 0
      data_to_bar_plot <-
        get_metric_top_scores_at_most (data_to_bar_plot,
                                       metric_names, max_bars)
      
      # (c) Draw the plots
      annual_plots <-
        generate_metric_bar_plots_for_one_row(
          data_to_bar_plot,
          'period',
          years[i],
          metric_names,
          max_bars,
          show_x_axis_label = ifelse(i == 3, TRUE, FALSE)
        )
      
      grid_plots[[i]] <- ggarrange(
        plotlist = annual_plots,
        ncol = length(metric_names),
        nrow = 1
      )
      
    }
    return (grid_plots)
  }

generate_full_graph_grid_metrics_whole_series <-
  function(node_metric_details,
           apl_metric_details,
           max_bars) {
    grid_plots <- list()
    
    # Merge node metrics and APL
    node_metric_to_bar_plot <-
      get_all_metrics_sum_by_group_name (node_metric_details, 'period')
    
    node_apl_to_bar_plot <-
      get_all_metrics_sum_by_group_name (apl_metric_details, 'period')
    
    data_to_bar_plot <-
      merge(node_metric_to_bar_plot, node_apl_to_bar_plot, all = TRUE)
    data_to_bar_plot[sapply(data_to_bar_plot, is.na)] <- 0
    data_to_bar_plot <-
      get_metric_top_scores_at_most (data_to_bar_plot,
                                     metric_names, max_bars)
    
    # Generate data for each of the two rows
    row_1 <-
      data_to_bar_plot %>% select(period, detections, mean_distance, degree)
    row_1_col_names <- colnames(row_1)[c(-1)]
    row_2 <-
      data_to_bar_plot %>% select(period, betweenness, eigenvector, loops)
    row_2_col_names <- colnames(row_2)[c(-1)]
    
    # Get row plots
    first_row_plots  <-
      generate_metric_bar_plots_for_one_row(
        data_to_bar_plot,
        'period',
        NULL,
        row_1_col_names,
        max_bars,
        show_x_axis_label = TRUE,
        is_whole_series = TRUE,
        title_position = 0
      )
    grid_plots[[1]] <- ggarrange(
      plotlist = first_row_plots,
      ncol = length(row_1_col_names),
      nrow = 1
    )
    
    
    second_row_plots  <-
      generate_metric_bar_plots_for_one_row(
        row_2,
        'period',
        NULL,
        row_2_col_names,
        max_bars,
        show_x_axis_label = TRUE,
        is_whole_series = TRUE,
        title_position = 0
      )
    
    grid_plots[[2]] <- ggarrange(
      plotlist = second_row_plots,
      ncol = length(row_2_col_names),
      nrow = 1
    )
    
    return(grid_plots)
    
  }

generate_all_graph_grid_metrics_stations_whole_series <-
  function(node_metric_details,
           max_bars,
           is_full_graph = TRUE) {
    grid_plots <- list()
    
    # Merge node metrics and APL
    data_to_bar_plot <-
      get_all_metrics_sum_by_group_name (node_metric_details, 'station')
    
    data_to_bar_plot <-
      get_metric_top_scores_at_most (data_to_bar_plot,
                                     metric_names, max_bars)
    
    
    # Generate data for each of the two rows
    if (is_full_graph) {
      row_1 <-
        data_to_bar_plot %>% select(station, detections, degree, betweenness)
    }
    else {
      row_1 <-
        data_to_bar_plot %>% select(station, degree, betweenness, eigenvector)
    }
    row_1_col_names <- colnames(row_1)[c(-1)]
    
    # Get row plots
    first_row_plots  <-
      generate_metric_bar_plots_for_one_row(
        data_to_bar_plot,
        'station',
        NULL,
        row_1_col_names,
        max_bars,
        show_x_axis_label = TRUE,
        is_whole_series = TRUE,
        p_title_size = 0.3
      )
    grid_plots[[1]] <- ggarrange(
      plotlist = first_row_plots,
      ncol = length(row_1_col_names),
      nrow = 1
    )
    
    if (is_full_graph) {
      row_2 <-
        data_to_bar_plot %>% select(station, eigenvector, loops)
      row_2_col_names <- colnames(row_2)[c(-1)]
      second_row_plots  <-
        generate_metric_bar_plots_for_one_row(
          row_2,
          'station',
          NULL,
          row_2_col_names,
          max_bars,
          show_x_axis_label = TRUE,
          is_whole_series = TRUE
        )
      
      grid_plots[[2]] <- ggarrange(
        plotlist = second_row_plots,
        ncol = length(row_2_col_names),
        nrow = 1
      )
    }
    
    return(grid_plots)
    
  }
d2gex/seabasstfm documentation built on July 29, 2022, 2:20 a.m.