R/detections/detection_views.R

Defines functions generate_heatmap_length_zones merge_extra_data_with_detections generate_heatmaps correct_ind_names build_all_detection_matrices build_detection_map_by_period slide_data_by_date_interval

source("R/utils.R")
source("R/heatmap_utils.R")
source("R/graph_utils.R")
library(dplyr)

slide_data_by_date_interval <-
  function(data,
           start,
           end,
           granularity = 'hours',
           as_list = TRUE) {
    #' @description Slice a dataframe into a list with as many dataframes as the granularity
    #' dictates
    #' @param data dataframe with a 'date_time' column
    #' @param start string YYYY-dd-mm HH:mm:SS
    #' @param end  string YYYY-dd-mm HH:mm:SS
    #' @param granularity string that takes values: months, weeks, days or hours
    #' @return a list of dataframes
    
    # Narrow down the interval
    data = data %>%
      dplyr::filter(date_time >= start & date_time <= end)
    
    # filter by the right criteria_interval
    interval_criteria <- get_interval_date_criteria(granularity)
    # Get unique interval criteria
    data <- data %>%
      dplyr::mutate(date_interval = format(as.POSIXct(data$date_time), interval_criteria))
    interval_criteria <- unique(data$date_interval)
    
    # Generate list of dataframes according to criteria
    data_views <- lapply(interval_criteria, function(x) {
      data %>%
        dplyr::filter(date_interval == x) %>%
        select(-date_interval)
    })
    
    names(data_views) <- interval_criteria
    
    if (as_list) {
      return (data_views)
    }
    
    return(bind_rows(data_views,  .id = "period"))
    
  }

build_detection_map_by_period <-
  function(data,
           ind_or_station = TRUE,
           loop_or_transition = FALSE,
           keep_duplicated_edges = FALSE) {
    #' @description Build a detection datframe by individuals or stations
    #' @param  ind_or_station boolean indicating whether the resulting dataframe
    #' must group by individuals or by stations
    #' @param loop_or_transition boolean parameter that indicates if we want an
    #' only-loop graph or only-transition-graph
    #' @param keep_duplicated_edges boolean indicating whether we keep duplicated
    #' edges or not
    
    if (ind_or_station) {
      additional_graph_cols <- c('period', 'ind_name')
      detection_cols <- additional_graph_cols
    }
    else {
      additional_graph_cols <- c('period')
      detection_cols <- c('period', 'to')
    }
    # (1) Build graph-like matrix with additional columns
    graph_df <- build_graph_matrix(
      data,
      loop_or_transition = loop_or_transition,
      keep_duplicated_edges = keep_duplicated_edges,
      keep_columns = additional_graph_cols
    )
    
    # (2) Build detection matrix indexed by individual name or stations out of
    # graph-like matrix
    detections_df <- graph_df %>%
      select(!!detection_cols) %>%
      group_by_at(detection_cols) %>%
      summarise(detections = n(), .groups = "drop") %>%
      rename(any_of(c('station' = 'to')))
    
    return (detections_df)
    
  }

build_all_detection_matrices <-
  function(data, ind_or_station = TRUE) {
    only_full_loops <- build_detection_map_by_period(
      data,
      ind_or_station,
      loop_or_transition = TRUE,
      keep_duplicated_edges = TRUE
    )
    
    only_full_transitions <- build_detection_map_by_period(
      data,
      ind_or_station,
      loop_or_transition = FALSE,
      keep_duplicated_edges = TRUE
    )
    
    group_by_columns <-
      c('period', ifelse(ind_or_station, 'ind_name', 'station'))
    loops_and_transitions <-
      rbind(only_full_loops, only_full_transitions) %>%
      group_by_at(group_by_columns) %>% summarise_all(sum)
    
    return (
      list(
        only_full_loops = only_full_loops,
        only_full_transitions = only_full_transitions,
        all = loops_and_transitions
      )
    )
    
  }
correct_ind_names <- function(detection_matrices) {
  corrected <- list()
  m_names <- names(detection_matrices)
  for (i in seq_along(detection_matrices)) {
    m_name <- m_names[i]
    data <- detection_matrices[[i]]
    data[data == "DICLAB-CONNECTMED-19803"] <- "DICLAB−19803"
    corrected[[m_name]] <- data
  }
  return(corrected)
}

generate_heatmaps <-
  function(data,
           main_title,
           legend_title,
           is_individual = TRUE,
           by_col_rows_order = FALSE) {
    if (is_individual) {
      row_name <- "ind_name"
      min_row_sum = 0
    }
    else {
      row_name <- "station"
      min_row_sum = 1
    }
    
    hmp <- list()
    det_types <- names(data)
    
    for (i in seq_along(data)) {
      detection_type <- det_types[[i]]
      data_view <- data[[i]]
      if (grepl('loops', detection_type)) {
        title_suffix <- 'the full graph (only loops)'
      }
      else if (grepl('transitions', detection_type)) {
        title_suffix <- 'the full graph (only transition edges)'
      }
      else {
        title_suffix <- 'the full graph (all edges)'
      }
      heatmap_matrix <-
        prepare_heatmap_data(data_view,
                             'detections',
                             row_name,
                             'period',
                             min_row_sum = min_row_sum)
      hmp[[i]] <-
        draw_interval_heatmap(
          heatmap_matrix,
          paste(main_title, "for", title_suffix),
          legend_title,
          by_col_rows_order
        )
      
    }
    return(hmp)
  }

merge_extra_data_with_detections <-
  function(detections, extra_data) {
    
    # (1) Get seabass species and add suffix to identifier
    extra_data <- extra_data %>%
      rename(ind_name = "ID.Code") %>%
      filter(Species == "Dicentrarchus labrax") %>%
      select(ind_name, Length, Zone) %>%
      mutate(ind_name = paste0("DICLAB-", ind_name))
    
    # (2) Generate detections by Length and Zone
    det_extra_data <- merge(detections,
                            extra_data[, c("ind_name", "Length", "Zone")],
                            by = "ind_name") %>%
      select(ind_name, Length, Zone)
    
    # (3) Clean string zone names
    det_extra_data <- det_extra_data %>% 
      mutate(Zone=str_replace_all(Zone, "[^a-zA-Z0-9\\s]", ""))
    
    return (det_extra_data)
  }

generate_heatmap_length_zones <- function(data,
                                          main_title,
                                          legend_title,
                                          row_name,
                                          col_name,
                                          by_percentage = FALSE,
                                          by_col_rows_order = FALSE,
                                          font_size = 5) {
  heatmap_matrix <-
    prepare_heatmap_data(
      data,
      'detections',
      row_name,
      col_name,
      min_row_sum = 0,
      month_as_name = FALSE,
      by_percentage = by_percentage
    )
  return(
    draw_interval_heatmap(heatmap_matrix,
                          main_title,
                          legend_title,
                          by_col_rows_order = by_col_rows_order,
                          font_size = font_size)
  )
  
}
d2gex/seabasstfm documentation built on July 29, 2022, 2:20 a.m.