#' Cough rate distribution
#'
#' Summarize cough rates and retrieve hourly cough counts for use in histogram-production and cough distribution modeling.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param min_session allows you to define the minimum amount of monitoring required
#' during a single hour in order for that hour to be included in the cough rate estimation.
#' For example, sometimes an hour of day contains only a few minutes of monitoring for a user;
#' that makes for a pretty poor estimate of that hour’s cough rate.
#' The default `min_session` is `0.5` hours, or 30 minutes of monitoring within an hour.
#'
#' @details This function can take both aggregated data (ho) and user-separated (ho_by_user),
#' but it does best with the latter. It returns metrics about hourly cough rates
#' based on an hour-by-hour analysis. Similar to the inputs in hyfe_summarize(),
#' the argument `min_session`
#' The slot `$details` returns a dataframe with all details you might need to analyze these rates (essentially the hours table from a hyfe object).
#'
#' @return A list with named slots: `overall` contains a dataframe with the mean and SD of hourly cough rate for the entire dataset.
#' These metrics are based on the mean/variance for each individual user, i.e.,
#' `mean_of_mean` is the average of mean cough rates across users.
#' When using a `hyfe` object prepared with `by_user=TRUE`,
#' this means that each user is weighted equally in the summary statistics.
#' When using a `hyfe` object in which all user data are aggregated together,
#' users will be weighted according to their session time;
#' The slot `$users` contains a dataframe with the mean and SD cough rate for each user.
#' The slot `$rates` returns a numeric vector of hourly cough rates that satisfy the minimum monitoring threshold:
#'
#' @export
#'
cough_rate_distribution <- function(ho,
min_session = 0.5){
#=============================================================================
# for debugging only -- not run!
if(FALSE){
# debugging only - not run
data(hyfe_data)
ho <- process_hyfe_data(hyfe_data)
ho_by_user <- process_hyfe_data(hyfe_data, by_user = TRUE)
hoi <- ho_by_user
min_session = .5
cough_rate_distribution(ho)
cough_rate_distribution(ho_by_user)$users %>% head(20)
}
#=============================================================================
hoi <- ho # make safe copy of input
# Test to see if `ho` is user-separated
this_by_user <- 'user_summaries' %in% names(hoi)
if(this_by_user){
# Data were processed separately for each user:
hourlies <- data.frame()
i=2
for(i in 1:length(hoi$user_summaries)){
useri <- hoi$user_summaries[[i]]
names(useri)
useri$id_key
hoursi <- useri$hours
hoursi$uid <- useri$id_key$uid[1]
names(hoursi)
hoursi <-
hoursi %>%
dplyr::filter(session_hours >= min_session) %>%
dplyr::select(uid, timestamp:n_uid, session_hours:cough_rate)
hoursi
hourlies <- rbind(hourlies, hoursi)
}
}else{
# Data were processed in aggregate
hourlies <-
hoursi %>%
dplyr::mutate(uid = 'aggregate') %>%
dplyr::filter(session_hours >= min_session) %>%
dplyr::select(uid, timestamp:n_uid, session_hours:ough_rate)
}
hourlies
# user summaries
user_summaries <-
hourlies %>%
dplyr::group_by(uid) %>%
dplyr::summarize(rate_mean = mean(cough_rate, na.rm=TRUE),
rate_variance = var(cough_rate, na.rm=TRUE),
n_hours = dplyr::n(),
n_uid = length(unique(uid)))
user_summaries
# overall summary
overall_summary <- user_summaries %>%
dplyr::summarize(mean_of_mean = mean(rate_mean, na.rm=TRUE),
sd_of_mean = sd(rate_mean, na.rm=TRUE),
mean_of_variance = mean(rate_variance, na.rm=TRUE),
sd_of_variance = sd(rate_variance, na.rm=TRUE),
n_hours_tot = sum(n_hours),
n_hours_mean = mean(n_hours,na.rm=TRUE),
n_uid = dplyr::n())
overall_summary
# Prep return
return_list <- list(overall = overall_summary,
users = user_summaries,
rates = as.numeric(hourlies$cough_rate),
details = hourlies)
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.