R/fit_model_to_cough.R

Defines functions fit_cough_model

Documented in fit_cough_model

#' Fit mean-variance regression model to cough
#'
#' Fit a regression model of the relationship between mean cough rate and its variance,
#' in which each data point is the mean-variance from a user with sufficient monitoring time.
#'
#' @param ho A `hyfe` object, which is generated by `process_hyfe_data()`.
#' See full details and examples in the [package vignette](https://hyfe-ai.github.io/hyfer/#hyfe_object).
#' @param cutoff_hours Minimum number of qualifying hours a user must have to be included in the model.
#' @param cutoff_hourly Minimum monitoring time within an hour for that hour to be included in the model.
#' @param toplot If `TRUE`, a regression scatterplot plot will be displayed.
#'
#' @return A list with several named elements: `p` is the regression F statistic;
#' `r2` is the adjusted R-squared value for the regression; `n` is the number of users included in the
#' analysis (i.e., the sample size of the model); `coefficients` provide the coefficient estimates from
#' the model (b1 and intercept); `model` provids the model object; `data` provide the data used to fit the model.
#'
#' @export
#'
fit_cough_model <- function(ho,
                            cutoff_hours = 50,
                            cutoff_hourly = 30,
                            toplot=TRUE){

  #=============================================================================
  # for debugging only -- not run!
  if(FALSE){
    library(hyferdrive)
    library(hyfer)
    hd <- get_cohort_data('jhu', fast=TRUE, verbose=TRUE)
    ho <- process_hyfe_data(hd,by_user=TRUE,verbose=TRUE)
    ho_lm <- fit_cough_model(ho)
    names(ho_lm)

    cutoff_hours = 30
    cutoff_hourly = 30
    cutoff_daily = 4
    verbose=TRUE

    # Try it
    ho_lm <- fit_cough_model(ho)
    names(ho_lm)
    ho_lm$p
    ho_lm$r2
    ho_lm$coefficients
    ho_lm$model
  }
  #=============================================================================

  hos <- hyfe_summarize(ho, cutoff_hourly = cutoff_hourly, cutoff_daily = cutoff_daily)
  hos$users %>% head

  # Only keep users with valid sample sizes and rates
  fitdata <- users %>%
    dplyr::filter(hourly_n >= cutoff_hours) %>%
    dplyr::filter(is.finite(hourly_rate)) %>%
    dplyr::filter(hourly_rate > 0) %>%
    select(uid:cohort_id,
           n=hourly_n,
           rate_mean=hourly_rate, rate_variance=hourly_var)
  head(fitdata)

  # Plot process
  if(toplot){
    par(mfrow=c(1,1))
    par(mar=c(4.2,4.2,.5,.5))
    plot(log(rate_variance) ~ log(rate_mean), data=fitdata,
         xlab='log Mean Hourly Cough Rate',
         ylab='log Variance of Cough Rate',
         pch=16,col=adjustcolor('black',alpha.f=.3))
    var_mean_fit <- lm( log(rate_variance) ~ log(rate_mean), data=fitdata)
    abline(var_mean_fit, col='firebrick')
  }

  # Harvest statistics
  summary(var_mean_fit)
  r2 <- summary(var_mean_fit)$adj.r.squared
  f <- summary(var_mean_fit)$fstatistic
  p <- pf(f[1],f[2],f[3],lower.tail=F)
  attributes(p) <- NULL

  # Coefficients
  b1  <- var_mean_fit$coefficients[2] ; b1
  intercept  <- var_mean_fit$coefficients[1] ; intercept


  cat('variance = exp(',b1,'*log(mean) + ',intercept,')')
  cat('\n')
  return_list <- list(p=p,
                      r2=r2,
                      n=nrow(fitdata),
                      coefficients=data.frame(b1,intercept),
                      model = var_mean_fit,
                      data=fitdata)

  return(return_list)
}
hyfe-ai/hyfer documentation built on Dec. 20, 2021, 5:53 p.m.