R/plot_circadian.R

Defines functions plot_circadian

Documented in plot_circadian

#' Plot circadian patterns in monitoring or cough (Hyfe)
#'
#' In these plots, the x axis is hour of day, 0 - 24.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param type The variable to plot.
#' @param time_unit The time unit by which to plot it.
#' @param date_min Optionally filter the data to a minimum date.
#' Provide as a character vector of length one, with the format `"YYYY-MM-DD HH:MM:SS"`.
#' @param date_max Optionally filter to a date maximum. Same format as `date_min` above.
#' @param by_user If `FALSE` (the default), a single line will be plotted that pools all users together.
#' If `TRUE`, a line will be plotted for each user separately.
#' Note that this is only possible if the call to `process_hyfe_data()`
#' that created the `hyfe` object used the argument `by_user=TRUE`.
#' @param print_plot If `TRUE` (the default), the plot will be printed for you.
#' @param return_plot If `TRUE` (*not* the default), the `ggplot` plot object will be returned.
#' This can be useful if you want to modify/add to the plot (e.g., change axis titles, add a plot title, etc.).
#' @param return_data If `TRUE` (*not* the default), a simple dataframe will be returned
#' that provides you with the exact values used to produce the plot.
#' @param verbose Print status updates?
#'
#' @return
#' @export
#'
plot_circadian <- function(ho,
                       type = c('coughs','sessions','sounds','rate'),
                       date_min = NULL,
                       date_max = NULL,
                       by_user = FALSE,
                       print_plot = TRUE,
                       return_plot = FALSE,
                       return_data = FALSE,
                       verbose=TRUE){

  if(FALSE){
    # debugging only - not run
    data(hyfe_data)
    ho <- process_hyfe_data(hyfe_data)
    ho_by_user <- process_hyfe_data(hyfe_data, by_user = TRUE)

    by_user <- FALSE
    by_user <- TRUE

    plot_type <- 'coughs'

    date_min = '2021-01-01 00:00:00'
    date_max = NULL
    # -- or -- #
    date_min = NULL
    date_max = NULL

    # Try it
    plot_circadian(ho)
    plot_circadian(ho_by_user, by_user = TRUE)
    plot_circadian(ho, type = 'sessions')
    plot_circadian(ho, type = 'sounds')
    plot_circadian(ho, type = 'coughs')
    plot_circadian(ho, type = 'rate')
  }


  # Stage safe copies of datasets
  hoi <- ho
  plot_type <- type[1]

  # Test to see if `ho` is user-separated
  this_by_user <- 'user_summaries' %in% names(hoi)

  # If so, pool data (unless the plot inputs say otherwise)
  if(this_by_user){
    hoi <- pool_user_data(hoi,
                          group_users = !by_user,
                          verbose=verbose)
  }

  # Source dataset from correct time unit and variable type ====================

  names(hoi)
  df <- hoi$hours
  names(df)
  if(this_by_user & by_user){
    df_circ <-
      df %>%
      dplyr::group_by(uid, hour) %>%
      dplyr::summarize(dplyr::across(session_seconds:coughs, sum)) %>%
      dplyr::mutate(session_weeks = session_days / 7) %>%
      dplyr::mutate(cough_rate = coughs / session_hours)
  }else{
    df_circ <-
      df %>%
      dplyr::group_by(hour) %>%
      dplyr::summarize(dplyr::across(session_seconds:coughs, sum)) %>%
      dplyr::mutate(session_weeks = session_days / 7) %>%
      dplyr::mutate(cough_rate = coughs / session_hours)
  }
  df_circ
  df <- df_circ %>% dplyr::arrange(hour)
  df$x <- df$hour %>% as.factor

  if(plot_type=='sessions'){
    df$y <- df$session_hours
    ylabel <- 'Monitoring (person-hours)'
  }

  if(plot_type=='sounds'){
    df$y <- df$peaks
    ylabel <- 'Explosive sounds (n)'
  }

  if(plot_type=='coughs'){
    df$y <- df$coughs
    ylabel <- 'Cough detections (n)'
  }

  if(plot_type=='rate'){
    df$y <- df$cough_rate
    ylabel <- 'Coughs per person-hour'
  }

  # Handle date filters ========================================================

  if(!is.null(date_min)){
    dmin <- lubridate::as_datetime(date_min) %>% as.numeric ; dmin
    df <- df %>% filter(x >= dmin)
  }
  if(!is.null(date_max)){
    dmax <- lubridate::as_datetime(date_max) %>% as.numeric ; dmax
    df <- df %>% filter(x <= dmax)
  }

  # Add uid, if ho was aggregated by user ======================================

  if('uid' %in% names(df)){
    df <- df %>% dplyr::select(x,y,uid)
  }else{
    df <- df %>% dplyr::select(x,y)
  }

  # Build plot =================================================================

  if(by_user & this_by_user){
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y, color=uid)) +
      ggplot2::theme(legend.text = ggplot2::element_text(size=4)) +
      ggplot2::geom_point()
  }else{
    # Pool all users
    if(by_user){message('Sorry, cannot plot by user -- `hyfe` object is an aggregation.')}
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y)) +
      ggplot2::geom_col(alpha=.5,fill='sienna3')
  }

  # add labels
  p <- p +
    ggplot2::xlab('Hour of day') +
    ggplot2::ylab(ylabel)

  # Return
  return_list <- list()
  if(return_plot){return_list$plot <- p}
  if(return_data){return_list$data <- df}
  if(print_plot){print(p)}
  if(length(return_list)>0){return(return_list)}
}
hyfe-ai/hyfer documentation built on Dec. 20, 2021, 5:53 p.m.