R/plot_trajectory.R

Defines functions plot_trajectory

Documented in plot_trajectory

#' Plot Hyfe trajectory
#'
#' In this plot, all users are plotted with the start of their monitoring period beginning at the plot's origin.
#' This type of plot could be useful if you want to examine patterns across users, such as retention in using the app
#' or the evoution of cough during a COVID-19 diagnosis.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' This function only accepts `hyfe` objects that have been processed with ` by_user = TRUE`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param type The variable to plot.
#' @param time_unit The time unit by which to plot it.
#' @param pool_users If `TRUE`, all user data will be pooled together into a single cumulative line.
#' @param day_max Option to control the extent of the X axis. In a trajectory plot, the minimum of the X axis will always be zero.
#' @param print_plot If `TRUE` (the default), the plot will be printed for you.
#' @param return_plot If `TRUE` (*not* the default), the `ggplot` plot object will be returned.
#' This can be useful if you want to modify/add to the plot (e.g., change axis titles, add a plot title, etc.).
#' @param return_data If `TRUE` (*not* the default), a simple dataframe will be returned
#' that provides you with the exact values used to produce the plot.
#' @param verbose Print status updates?
#'
#' @return
#' @export
#'
plot_trajectory <- function(ho,
                            type = c('coughs','sounds','sessions', 'rate'),
                            unit = c('days','hours','weeks'),
                            pool_users = FALSE,
                            day_max = NULL,
                            print_plot = TRUE,
                            return_plot = FALSE,
                            return_data = FALSE,
                            verbose=TRUE){

  if(FALSE){
    # debugging only - not run
    data(hyfe_data)
    ho <- process_hyfe_data(hyfe_data, by_user = TRUE)

    type <- 'rate'
    unit <- 'days'

    day_max = NULL
    pool_users = FALSE

    # Try it
    plot_trajectory(ho)

    plot_trajectory(ho, type='sessions', unit = 'hours')
    plot_trajectory(ho, type='sessions', unit = 'days')
    plot_trajectory(ho, type='sessions', unit = 'days', pool_users = TRUE)
    plot_trajectory(ho, type='sessions', unit = 'weeks')

    plot_trajectory(ho, type='sounds', unit = 'hours')
    plot_trajectory(ho, type='sounds', unit = 'days')
    plot_trajectory(ho, type='sounds', unit = 'days', pool_users = TRUE)
    plot_trajectory(ho, type='sounds', unit = 'weeks')

    plot_trajectory(ho, type='coughs', unit = 'hours')
    plot_trajectory(ho, type='coughs', unit = 'days')
    plot_trajectory(ho, type='coughs', unit = 'days', pool_users = TRUE)
    plot_trajectory(ho, type='coughs', unit = 'weeks')

    plot_trajectory(ho, type='rate', unit = 'hours')
    plot_trajectory(ho, type='rate', unit = 'days')
    plot_trajectory(ho, type='rate', unit = 'days', pool_users = TRUE)
    plot_trajectory(ho, type='rate', unit = 'weeks')
  }


  # Stage safe copies of datasets
  hoi <- ho
  plot_type <- type[1]
  time_unit <- unit[1]

  #i=1
  #for(i in 1:length(hoi$user_summaries)){
  #  useri <- hoi$user_summaries[[i]]
  #  names(useri)
  #  hoursi <- useri$hours
  #}

  hoi <- pool_user_data(hoi,
                        group_users = FALSE,
                        verbose=verbose)

  names(hoi)
  hoi$hours %>%  names




  # Source dataset from correct time unit and variable type ====================

  if(time_unit == 'hours'){
    df <- hoi$hours
    df$x <- df$study_hour
    xlabel <- 'Hours since enrollment'
    if(plot_type == 'sessions'){
      df$y <- df$session_hours
      ylabel <- 'Monitoring (person-hours)'
    }
  }

  if(time_unit == 'days'){
    df <- hoi$days
    df$x <- df$study_day
    xlabel <- 'Days since enrollment'
    if(plot_type == 'sessions'){
      df$y <- df$session_days
      ylabel <- 'Monitoring (person-days)'
    }
  }

  if(time_unit == 'weeks'){
    df <- hoi$weeks
    df$x <- df$study_week
    xlabel <- 'Weeks since enrollment'
    if(plot_type == 'sessions'){
      df$y <- df$session_days/7
      ylabel <- 'Monitoring (person-weeks)'
    }
  }

  if(plot_type == 'sounds'){
    df$y <- df$peaks
    ylabel <- 'Explosive sounds (n)'
    }

  if(plot_type == 'coughs'){
    df$y <- df$coughs
    ylabel <- 'Cough detections (n)'
  }

  if(plot_type == 'rate'){
    df$y <- df$cough_rate
    ylabel <- 'Coughs per person-hour (n)'
  }

  # Simplify
  df <- df %>% dplyr::select(x,y,session_hours,uid)

  # Handle trajectory component
  head(df)
  new_df <- data.frame()
  uids <- unique(df$uid)
  i=1
  for(i in 1:length(uids)){
    uidi <- uids[i] ; uidi
    dfi <- df[df$uid == uidi,]
    dfi
    t0 <- which(dfi$session_hours > 0.1)[1]
    t0
    dfi$x <- dfi$x - t0
    dfi <- dfi[dfi$x >= 0,]
    range(dfi$x)
    new_df <- rbind(new_df, dfi)
  }
  df <- new_df

  # Handle date filters
  if(is.null(day_max)){day_max <- max(df$x)}

  # Pool user data if specified
  if(pool_users){
    df <- df %>% dplyr::group_by(x) %>% dplyr::summarize(y=sum(y))
  }

  # Build plot =================================================================

  if(pool_users){
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y)) +
      ggplot2::theme(legend.text = ggplot2::element_text(size=4)) +
      ggplot2::geom_area(alpha=.3,col='seagreen4',fill='seagreen4') +
      ggplot2::geom_line(alpha=.5,lwd=.5,col='seagreen4')
  }else{
    p <-ggplot2::ggplot(df, ggplot2::aes(x=x, y=y, color=uid)) +
      ggplot2::geom_line(alpha=.5)
  }

  # add labels & xlim
  p <- p +
    ggplot2::xlim(0,day_max) +
    ggplot2::xlab(xlabel) +
    ggplot2::ylab(ylabel)


  # Return
  return_list <- list()
  if(return_plot){return_list$plot <- p}
  if(return_data){return_list$data <- df}
  if(print_plot){print(p)}
  if(length(return_list)>0){return(return_list)}
}
hyfe-ai/hyfer documentation built on Dec. 20, 2021, 5:53 p.m.