R/plot_total.R

Defines functions plot_total

Documented in plot_total

#' Plot cumulative totals over time in your Hyfe dataset
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param type The variable to plot.
#' @param time_unit The time unit by which to plot it.
#' @param date_min Optionally filter the data to a minimum date.
#' Provide as a character vector of length one, with the format `"YYYY-MM-DD HH:MM:SS"`.
#' @param date_max Optionally filter to a date maximum. Same format as `date_min` above.
#' @param by_user If `FALSE` (the default), a single line will be plotted that pools all users together.
#' If `TRUE`, a line will be plotted for each user separately.
#' Note that this is only possible if the call to `process_hyfe_data()`
#' that created the `hyfe` object used the argument `by_user=TRUE`.
#' @param print_plot If `TRUE` (the default), the plot will be printed for you.
#' @param return_plot If `TRUE` (*not* the default), the `ggplot` plot object will be returned.
#' This can be useful if you want to modify/add to the plot (e.g., change axis titles, add a plot title, etc.).
#' @param return_data If `TRUE` (*not* the default), a simple dataframe will be returned
#' that provides you with the exact values used to produce the plot.
#'
#' @export
#'
plot_total <- function(ho,
                       type = c('sessions','sounds','coughs'),
                       unit = c('days','hours','weeks'),
                       date_min = NULL,
                       date_max = NULL,
                       by_user = FALSE,
                       print_plot = TRUE,
                       return_plot = FALSE,
                       return_data = FALSE){

  if(FALSE){
    # debugging only - not run

    data(ho) ; hoi <- ho
    data(ho_by_user) ; hoi <- ho_by_user

    by_user <- FALSE
    by_user <- TRUE

    type <- 'sessions'
    unit <- 'days'

    date_min = '2021-01-01 00:00:00'
    date_max = NULL
    # -- or -- #
    date_min = NULL
    date_max = NULL

    # Try it
    plot_total(hoi)
    plot_total(hoi, by_user = TRUE)

    plot_total(hoi, type='sessions', unit = 'hours')
    plot_total(hoi, type='sessions', unit = 'days')
    plot_total(hoi, type='sessions', unit = 'weeks')

    plot_total(hoi, type='sounds', unit = 'hours')
    plot_total(hoi, type='sounds', unit = 'days')
    plot_total(hoi, type='sounds', unit = 'days', by_user = TRUE)
    plot_total(hoi, type='sounds', unit = 'weeks')

    plot_total(hoi, type='coughs', unit = 'hours')
    plot_total(hoi, type='coughs', unit = 'days')
    plot_total(hoi, type='coughs', unit = 'days', by_user = TRUE)
    plot_total(hoi, type='coughs', unit = 'weeks')
  }


  # Stage safe copies of datasets
  hoi <- ho
  plot_type <- type[1]
  time_unit <- unit[1]

  # Test to see if `ho` is user-separated
  this_by_user <- 'user_summaries' %in% names(hoi)

  # If so, pool data (unless the plot inputs say otherwise)
  if(this_by_user){
    hoi <- pool_user_data(hoi,
                          group_users = !by_user,
                          verbose=TRUE)
  }

  # Source dataset from correct time unit and variable type ====================

  names(hoi)

  if(time_unit == 'hours'){
    df <- hoi$hours
    df$x <- df$date_time
    if(plot_type == 'sessions'){
      df$y <- df$session_hours_tot
      ylabel <- 'Monitoring (person-hours)'
    }
  }

  if(time_unit == 'days'){
    df <- hoi$days
    df$x <- df$date
    if(plot_type == 'sessions'){
      df$y <- df$session_days_tot
      ylabel <- 'Monitoring (person-days)'
    }
  }

  if(time_unit == 'weeks'){
    df <- hoi$weeks
    df$x <- df$date_floor %>% lubridate::as_datetime()
    if(plot_type == 'sessions'){
      df$y <- df$session_days_tot/7
      ylabel <- 'Monitoring (person-weeks)'
    }
  }

  if(plot_type == 'sounds'){
    df$y <- df$peaks_tot
    ylabel <- 'Explosive sounds (n)'
    }

  if(plot_type == 'coughs'){
    df$y <- df$coughs_tot
    ylabel <- 'Cough detections (n)'
  }

  # Handle date filters ========================================================

  if(!is.null(date_min)){
    dmin <- lubridate::as_datetime(date_min) %>% as.numeric ; dmin
    df <- df %>% filter(x >= dmin)
  }
  if(!is.null(date_max)){
    dmax <- lubridate::as_datetime(date_max) %>% as.numeric ; dmax
    df <- df %>% filter(x <= dmax)
  }

  # Add uid, if ho was aggregated by user ======================================

  if('uid' %in% names(df)){
    df <- df %>% dplyr::select(x,y,uid)
  }else{
    df <- df %>% dplyr::select(x,y)
  }

  # Build plot =================================================================

  if(by_user & this_by_user){
    # Keep users separate
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y, color=uid)) +
      ggplot2::theme(legend.text = ggplot2::element_text(size=4)) +
      ggplot2::geom_line()
  }else{
    # Pool all users
    if(by_user){message('Sorry, cannot plot by user -- `hyfe` object is an aggregation.')}
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y)) +
      ggplot2::geom_line(alpha=.5,lwd=1.5,col='darkblue') +
      ggplot2::geom_area(alpha=.3,col='darkblue',fill='darkblue')
  }

  # add labels
  p <- p +
    ggplot2::xlab(NULL) +
    ggplot2::ylab(ylabel)


  # Return
  return_list <- list()
  if(return_plot){return_list$plot <- p}
  if(return_data){return_list$data <- df}
  if(print_plot){print(p)}
  if(length(return_list)>0){return(return_list)}
}
hyfe-ai/hyfer documentation built on Dec. 20, 2021, 5:53 p.m.