R/mixture_model.R

Defines functions estimate_from_mixture

Documented in estimate_from_mixture

#' Fit a mixture model to classify serostatus
#'
#' Refers to section 11.1 - 11.4
#'
#' @param antibody_level - vector of the corresponding raw antibody level
#' @param breaks - number of intervals which the antibody_level are grouped into
#' @param pi - proportion of susceptible, infected
#' @param mu - a vector of means of component distributions (vector of 2 numbers in ascending order)
#' @param sigma -  a vector of standard deviations of component distributions (vector of 2 number)
#'
#' @importFrom mixdist mix mixgroup mixparam
#' @importFrom stats fitted
#'
#' @export
#' @return a list of class mixture_model with the following items
#'   \item{df}{the dataframe used for fitting the model}
#'   \item{info}{list of 3 items parameters, distribution and constraints for the fitted model}
#'   \item{susceptible}{fitted distribution for susceptible}
#'   \item{infected}{fitted distribution for infected}
#'
#' @examples
#' df <- vzv_be_2001_2003[vzv_be_2001_2003$age < 40.5,]
#' data <- df$VZVmIUml[order(df$age)]
#' model <- mixture_model(antibody_level = data)
#' model$info
#' plot(model)
mixture_model <- function (antibody_level, breaks=40, pi=c(0.2, 0.8), mu=c(2,6), sigma=c(0.5, 1)) {
  model <- list()

  # add 1 to avoid 0 when computing logs
  log_antibody <- log(antibody_level + 1)
  data <- mixgroup(log_antibody, breaks = breaks)
  starting_values <- mixparam(pi=pi,mu=mu,sigma=sigma)

  model$info <- mix(data,starting_values,dist="norm")
  model$df <- data.frame(antibody_level = data$X, count = data$count)
  model$susceptible <- fitted(model$info)$joint[,1]
  model$infected <- fitted(model$info)$joint[,2]

  class(model) <- "mixture_model"
  model
}

#' Estimate seroprevalence and foi by combining mixture model and regression
#'
#' Refers to section 11.2 - 11.4
#'
#' @param age - vector of age
#' @param antibody_level - vector of the corresponding raw antibody level
#' @param mixture_model - mixture_model object generated by serosv::mixture_model()
#' @param s - smoothing basis used to fit antibody level
#' @param sp - smoothing parameter
#' @param threshold_status - sero status using threshold approach in line listing (optional, for visualization and comparison only)
#' @param monotonize - whether to monotonize seroprevalence (default to TRUE)
#'
#' @import mgcv
#' @importFrom stats approx gaussian
#'
#' @return  a list of class estimated_from_mixture with the following items
#'   \item{df}{the dataframe used for fitting the model}
#'   \item{info}{a fitted "gam" model for mu(a)}
#'   \item{sp}{seroprevalence}
#'   \item{foi}{force of infection}
#'   \item{threshold_status}{serostatus using threshold method only if provided}
#' @seealso
#'  [mgcv::gam()] for more information about the fitted gam object
#'
#' @export
estimate_from_mixture <- function(age, antibody_level, threshold_status = NULL, mixture_model, s="ps", sp=83, monotonize=TRUE){
  # Helper funciton to compute derivative of mu(a) aka. mu'(a)
  differentiate_mu<-function(x,mu)
  {
    grid<-sort(unique(x))
    mugrid<-(mu[order(x)])[duplicated(sort(x))==F]
    dmu<-diff(mugrid)/diff(grid)
    dermu<-approx((grid[-1]+grid[-length(grid)])/2,dmu,grid[c(-1,-length(grid))])$y
    return(list(grid=grid[c(-1,-length(grid))],mu=mugrid[c(-1,-length(grid))],dermu=dermu))
  }


  model <- list()

  # sort, just in case
  age <- age[order(age)]
  antibody_level <- antibody_level[order(age)]
  log_antibody <- log(antibody_level + 1)

  # get mu_s and mu_i from mixture model
  mu_s <- mixture_model$info$parameters$mu[1]
  mu_i <- mixture_model$info$parameters$mu[2]

  # Fit mu(a)
  model$info <- gam(log_antibody ~ s(age, bs = "ps", sp=83), family = gaussian())

  # making sure age range is enough for a smooth estimation
  threshold <- 20
  pred_age <- age
  if (length(age) < threshold){
    pred_age <- seq(min(age, na.rm = TRUE), max(age, na.rm = TRUE), 0.1)
  }

  # compute sp = [mu(a) - mu_s] / mu_i - mu_s
  # call pava to monotonize sp
  fitted_mu_a <- predict(
    model$info,
    data.frame(age = pred_age)
  )
  model$sp <- data.frame(
    age = pred_age,
    sp = (fitted_mu_a - mu_s)/(mu_i - mu_s)
  )

  if (monotonize){
    model$sp$sp <- pava(model$sp$sp)$pai2
  }

  # compute foi = mu'(a)/(mu_I - mu(a))
  compute_dermu <- differentiate_mu(model$sp$age, model$sp$sp)
  model$foi <- compute_dermu$dermu/(mu_i - compute_dermu$mu)
  model$foi <- data.frame(
    foi_x = compute_dermu$grid,
    foi = model$foi/2 #not sure y the /2, but it works
  )

  # save fitted df
  model$df <- data.frame(age = age, antibody_level = antibody_level)

  if (!is.null(threshold_status)){
    model$df[["threshold_status"]] <- threshold_status
  }
  class(model) <- "estimate_from_mixture"

  model
}

Try the serosv package in your browser

Any scripts or data that you put into this service are public.

serosv documentation built on Oct. 18, 2024, 5:07 p.m.