#' Fit mean-variance regression model to cough
#'
#' Fit a regression model of the relationship between mean cough rate and its variance,
#' in which each data point is the mean-variance from a user with sufficient monitoring time.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param cutoff_hours Minimum number of qualifying hours a user must have to be included in the model.
#' @param cutoff_hourly Minimum monitoring time within an hour for that hour to be included in the model.
#' @param toplot If `TRUE`, a regression scatterplot plot will be displayed.
#'
#' @return A list with several named elements: `p` is the regression F statistic;
#' `r2` is the adjusted R-squared value for the regression; `n` is the number of users included in the
#' analysis (i.e., the sample size of the model); `coefficients` provide the coefficient estimates from
#' the model (b1 and intercept); `model` provids the model object; `data` provide the data used to fit the model.
#'
#' @export
#'
fit_cough_model <- function(ho,
cutoff_hours = 50,
cutoff_hourly = 30,
toplot=TRUE){
#=============================================================================
# for debugging only -- not run!
if(FALSE){
library(hyferdrive)
library(hyfer)
hd <- get_cohort_data('jhu', fast=TRUE, verbose=TRUE)
ho <- process_hyfe_data(hd,by_user=TRUE,verbose=TRUE)
ho_lm <- fit_cough_model(ho)
names(ho_lm)
cutoff_hours = 30
cutoff_hourly = 30
cutoff_daily = 4
verbose=TRUE
# Try it
ho_lm <- fit_cough_model(ho)
names(ho_lm)
ho_lm$p
ho_lm$r2
ho_lm$coefficients
ho_lm$model
}
#=============================================================================
hos <- hyfe_summarize(ho, cutoff_hourly = cutoff_hourly, cutoff_daily = cutoff_daily)
hos$users %>% head
# Only keep users with valid sample sizes and rates
fitdata <- users %>%
dplyr::filter(hourly_n >= cutoff_hours) %>%
dplyr::filter(is.finite(hourly_rate)) %>%
dplyr::filter(hourly_rate > 0) %>%
select(uid:cohort_id,
n=hourly_n,
rate_mean=hourly_rate, rate_variance=hourly_var)
head(fitdata)
# Plot process
if(toplot){
par(mfrow=c(1,1))
par(mar=c(4.2,4.2,.5,.5))
plot(log(rate_variance) ~ log(rate_mean), data=fitdata,
xlab='log Mean Hourly Cough Rate',
ylab='log Variance of Cough Rate',
pch=16,col=adjustcolor('black',alpha.f=.3))
var_mean_fit <- lm( log(rate_variance) ~ log(rate_mean), data=fitdata)
abline(var_mean_fit, col='firebrick')
}
# Harvest statistics
summary(var_mean_fit)
r2 <- summary(var_mean_fit)$adj.r.squared
f <- summary(var_mean_fit)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=F)
attributes(p) <- NULL
# Coefficients
b1 <- var_mean_fit$coefficients[2] ; b1
intercept <- var_mean_fit$coefficients[1] ; intercept
cat('variance = exp(',b1,'*log(mean) + ',intercept,')')
cat('\n')
return_list <- list(p=p,
r2=r2,
n=nrow(fitdata),
coefficients=data.frame(b1,intercept),
model = var_mean_fit,
data=fitdata)
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.