#' Summarize Hyfe data
#'
#' This function produces summary tables for your `hyfe` object.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param cutoff_hourly This function uses sample size cutoffs to ensure that rates are
#' not swung to extremes due to insufficient monitoring. For example,
#' an hour of day with 1 cough detection but only 1 minute of monitoring would
#' produce an hourly cough rate estimate of 60 coughs per hour. Those scenarios should be avoided.
#' The first cutoff used is `cutoff_hourly`: by default, at least 30 minutes of monitoring must occur within a hour-long window of the day
#' in order for that hour to contribute to the estimation of the hourly cough rate.
#' @param cutoff_daily The second cutoff used; the default is that at least 4 hours of monitoring must occur within a day in order
#' for that day to count toward the daily cough rate.
#' @details If your `hyfe` object was processed by aggregating all users together
#' (i.e., your call `process_hyfe_data()` included the argument `by_user = FALSE`,
#' which is the function's default), the cough rates reported should be treated with caution:
#' these rates are going to be biased by users with
#' (1) a lot of monitoring time and (2) a lot of coughs.
#' To summarize Hyfe data from multiple users in a way that is truly balanced,
#' in which each user is weighted equally, you should use a `hyfe` object processed `with by_user = TRUE`.
#'
#' @details Note that the cumulative counts are unaffected by these cutoffs, only the rates.
#'
#' @return A list with two slots: `overall`, which summarizes the entire dataset by pooling users together,
#' and `users`, which summarizes each user individually.
#' The `users` slot is `NULL` when the `hyfe` object provided was processed with `by_user = FALSE`.
#' That `users` table, if available, is used to build the `overall` slot in which the mean rates (i.e., `hourly_rate` and `daily_rate`) are the average of each user’s mean rates,
#' and – importantly – the variability metrics (`hourly_var`, `hourly_sd`, `daily_var`, `daily_sd`)
#' now pertain to the variability *among users*.
#'
#' @export
#'
hyfe_summarize <- function(ho,
cutoff_hourly = 30,
cutoff_daily = 4){
if(FALSE){
library(hyfer)
library(dplyr)
data(hyfe_data)
ho <- process_hyfe_data(hyfe_data)
ho <- process_hyfe_data(hyfe_data, by_user = TRUE)
cutoff_hourly = 30
cutoff_daily = 4
}
summarize_core <- function(ho, cutoff_hourly, cutoff_daily){
hoi <- ho
#hoi <- useri
hours <- hoi$hours
days <- hoi$days
if('sessions' %in% names(hoi)){
users <- hoi$sessions$uid %>% unique %>% length
}else{
users <- 1
}
# Basic summaries
summi <-
data.frame(users = users,
seconds = hours$session_seconds %>% as.numeric %>% sum)
summi$hours <- summi$seconds / 3600
summi$days <- summi$hours / 24
summi$years <- summi$days / 365
summi$sounds = hours$peaks %>% sum
summi$coughs = hours$coughs %>% sum
# Rates
hours <- hours %>% filter(session_seconds/n_uid >= cutoff_hourly*60)
hours %>% nrow
days <- days %>% filter(session_hours/n_uid >= cutoff_daily)
days %>% nrow
summi$hourly_n = hours %>% nrow
summi$hourly_rate = hours$cough_rate %>% mean(na.rm=TRUE)
summi$hourly_var = hours$cough_rate %>% var(na.rm=TRUE)
summi$hourly_sd = hours$cough_rate %>% sd(na.rm=TRUE)
summi$hourly_max = hours$cough_rate %>% max(na.rm=TRUE)
summi$daily_n = days %>% nrow
summi$daily_rate = days$cough_rate %>% mean(na.rm=TRUE)
summi$daily_var = days$cough_rate %>% var(na.rm=TRUE)
summi$daily_sd = days$cough_rate %>% sd(na.rm=TRUE)
summi$daily_max = days$cough_rate %>% max(na.rm=TRUE)
summi
return(summi)
}
if('user_summaries' %in% names(ho)){
# ho by user
users <- data.frame()
i=147
for(i in 1:length(ho$user_summaries)){
useri <- ho$user_summaries[[i]]
names(useri)
summi <- summarize_core(useri,
cutoff_hourly = cutoff_hourly,
cutoff_daily = cutoff_daily)
summi
idkey <- useri$id_key
if(nrow(idkey)==0){
idkey <- data.frame(uid=NA,name=NA,email=NA,alias=NA,cohort_id=NA)
}
summi <- data.frame(idkey, summi)
users <- rbind(users, summi)
}
users
users <- users[!is.na(users$uid),]
nrow(users)
overall <- data.frame(users = nrow(users),
seconds = sum(users$seconds),
hours = sum(users$hours),
days = sum(users$days),
years = sum(users$years),
sounds = sum(users$sounds),
coughs = sum(users$coughs),
hourly_n = mean(users$hourly_n,na.rm=TRUE),
hourly_rate = mean(users$hourly_rate, na.rm=TRUE),
hourly_var = var(users$hourly_rate, na.rm=TRUE),
hourly_sd = sd(users$hourly_rate, na.rm=TRUE),
hourly_max = max(users$hourly_rate, na.rm=TRUE),
daily_n = mean(users$daily_n, na.rm=TRUE),
daily_rate = mean(users$daily_rate, na.rm=TRUE),
daily_var = var(users$daily_rate, na.rm=TRUE),
daily_sd = sd(users$daily_rate, na.rm=TRUE),
daily_max = max(users$daily_rate, na.rm=TRUE))
overall
}else{
# Simply ho object
users <- NULL
overall <- summarize_core(ho,
cutoff_hourly = cutoff_hourly,
cutoff_daily = cutoff_daily)
}
return(list(overall = overall,
users = users
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.