R/getIncidence.R

Defines functions getIncidence MIaggregate MIpredict MIdata getAgeYear calcPoisExact AggFunc getIncData

Documented in AggFunc calcPoisExact getAgeYear getIncData getIncidence MIaggregate MIdata MIpredict

#' @title Function to impute serodate, split at censoring date, and set age.
#' @description Function that imputes sero date, splits at censoring date and set the age.
#' @param rtdat dataset from \code{\link{getRTData}}. 
#' @param Args takes list from \code{\link{setArgs}}.
#' @param bdat dataset from \code{\link{getBirthDate}}. 
#' @param func Function to perform additional operation. 
#' @return data.frame
#' @export
#' @examples
#' Args <- setArgs(imputeMethod = imputeRandomPoint)
#' rtdat <- getRTData(dat=getHIV())
#' getIncData(rtdat, Args = Args)
getIncData <- function(rtdat, bdat = NULL, Args, func = identity) {
  if (is.null(bdat)) bdat = getBirthDate()
  dat <- Args$imputeMethod(rtdat)
  edat <- splitAtSeroDate(dat) 
  edat <- setData(edat, Args, time2 = "obs_end", birthdate = bdat)
  edat <- mutate(edat, tscale = .data$Time/365.25)
  func(edat)
}

#' @title Function for making the AggByYear and AggByAge functions.
#' @description  A function factory for creating specific AggByFuncs, see for
#' example \code{\link{AggByYear}}. This function is a closure and so returns
#' another function.
#' @param RHS The variable name as string that you want to aggregate by.
#' @return function
#' @export
#' @examples
#' AggByYear <- AggFunc("Year")

AggFunc <- function(RHS) {
  function(dat) {
    F1 <- stats::as.formula(paste(
      "cbind(sero_event, pyears=Time/365.25) ~ ", RHS))
    out <- stats::aggregate(F1, data=dat, FUN=sum, drop=FALSE)
    out
  }
}

#' @title Aggregates sero events and person years by year.
#' @description Aggregates data to get sero events and pyears by year.
#' @param dat A dataframe generated from \code{\link{getIncData}}. 
#' @return data.frame
#' @export 
#' @examples
#' Args <- setArgs(imputeMethod = imputeMidPoint)
#' rtdat <- getRTData(dat = getHIV())
#' idat <- getIncData(rtdat, Args = Args)
#' inc <- AggByYear(idat)
AggByYear <- AggFunc("Year")

#' @title Aggregates sero events and person years by age category.
#' @description Aggregates data to get sero events and pyears by age category.
#' @param dat A dataframe generated from \code{\link{getIncData}}. 
#' @return data.frame
#' @export 
#' @examples
#' # Show for one imputation 
#' getFiles <- setFiles('/home/alain/Seafile/AHRI_Data/2020')
#' Args <- setArgs(imputeMethod = imputeMidPoint)
#' rtdat <- getRTData(dat = getHIV())
#' idat <- getIncData(rtdat, Args = Args)
#' inc <- AggByAge(idat)
AggByAge <- AggFunc("AgeCat")

#' @title  Calculate crude incidence rates using the Poisson exact method. 
#' @description  Calculate crude incidence rates using the Poisson exact method. 
#' @param dat A dataset from \code{\link{AggByYear}} or \code{\link{AggByAge}}.
#' @param byVar Either "AgeCat" or "Year".
#' @param fmt If TRUE, format by 100 person-years and round off to three decimal places. 
#' @return data.frame
#' @export 
#' @examples
#' Args <- setArgs(imputeMethod = imputeMidPoint)
#' rtdat <- getRTData(getHIV())
#' idat <- getIncData(rtdat, Args = Args)
#' idat_yr <- AggByYear(idat)
#' calcPoisExact(idat_yr, byVar="Year")

calcPoisExact <- function(dat, byVar="Year", fmt=TRUE) {
  dat <- split(dat, dat[, byVar])
  dat <- do.call(rbind, lapply(dat, function(x)
    epitools::pois.exact(x$sero_event, x$pyears)))
  if (fmt==TRUE) {
    vars <- c("rate", "lower", "upper")
    dat[vars] <- lapply(dat[vars], function(x) round(x*100, 3))
  }
  dat <- dplyr::rename(dat, sero_event=.data$x, 
    pyears=.data$pt, lci=.data$lower, uci=.data$upper)
  dat
}



#' @title Calculate mean age by year. 
#' @description  Calculate mean age by year. This is mostly used in \code{MIpredict}
#' @param dat A dataset with an Age and Year variable. 
#' @return data.frame
#' @export 
#' @examples
#' age_dat <- getAgeYear(dat=setHIV(setArgs()))

getAgeYear <- function(dat) {
  group_by(dat, .data$Year) %>% 
  summarize(Age = mean(.data$Age)) %>% 
  mutate(Year = factor(.data$Year), tscale=1)
}


#' @title  Function to generate multiple datasets with imputed serodate.
#' @param rtdat Dataframe from \code{\link{getRTData}}.
#' @param Args Takes list from \code{\link{setArgs}}.
#' @param f Function to perform additional operations. 
#' @param bdat Dataframe of birthdates, otherwise it use \code{\link{getBirthDate}}
#' @return list
#' @export 
#' @examples
#' Args <- setArgs(nSim=2, mcores=1)
#' rtdat <- getRTData(dat=getHIV())
#' MIdata(rtdat, Args)
MIdata <- function(rtdat, Args, f = identity,  bdat = NULL) {
  if (is.null(bdat)) bdat = getBirthDate()
  parallel::mclapply(seq(Args$nSim),
    function(i) { 
      cat(i, "")
      getIncData(rtdat, bdat, Args, func = f)},
      mc.cores = Args$mcores)
}


#' @title Predict incidence rate estimates after multiple imputation. 
#' @description Used with \code{mitools} to get incidence rate estimates per 100
#' person-years after multiple imputation.
#' @param object Results from the object generated by \code{MIcombine}.
#' @param newdata Dataframe of variables for prediction.
#' @return data.frame
#' @export 
#' @examples
#' rtdat <- getRTData(dat=getHIV())
#' Args <- setArgs(nSim=2, Years=c(2008:2018))
#' mdat <- MIdata(rtdat, Args)
#' mdat <- mitools::imputationList(mdat)
#' F1 <- "sero_event ~ -1 + as.factor(Year) + Age +
#'   as.factor(Year):Age + offset(log(tscale))" 
#' mods <- with(mdat, glm(as.formula(F1), family=poisson))
#' betas <- mitools::MIextract(mods,fun=coef)
#' var <- mitools::MIextract(mods, fun=vcov)
#' res <-  mitools::MIcombine(betas, var) 
#' MIpredict(object=mods, newdata=getAgeYear(setHIV(Args)))

MIpredict <- function(object,  newdata)  {
  if ("list" %in% class(object)) {
    res <- mitools::MIcombine(object)
    object <- object[[1]]
  } else {
    res = object
  }
  Terms <- stats::delete.response(stats::terms(object))
  m <- stats::model.frame(Terms, newdata, xlev = object$xlevels)
  mat <- stats::model.matrix(Terms, m, contrasts.arg = object$contrasts)
  pred <- mat %*% stats::coef(res)
  se <-  sqrt(diag(mat %*% stats::vcov(res) %*% t(mat)))
  fit <- exp(pred)
  se.fit <- se * abs(exp(pred))
  Qt <- c(-1, 1) * stats::qnorm((1 - 0.95)/2, lower.tail = FALSE)
  CI <- sapply(Qt, "*", se.fit)
  out <- data.frame(fit=fit, se.fit=se.fit, 
    lci=fit+CI[, 1], uci=fit+CI[, 2])
  out <- data.frame(lapply(out, "*", 100))
  rownames(out) <- object$xlevels[[1]]
  out
}

#' @title  Aggregates the HIV incidence results after mutiple imputation.
#' @description  Aggregates results after mutiple imputation.
#' @param dat List of results.
#' @param col_names Names used to collect columns into a data.frame.
#' @return data.frame
#' @export 
#' @examples
#' Args <- setArgs(nSim=2, mcores=1)
#' rtdat <- getRTData(dat=getHIV())
#' mdat <- MIdata(rtdat, Args)
#' mdat <- mitools::imputationList(mdat)
#' inc <- with(mdat, fun=AggByYear)
#' MIaggregate(inc)

MIaggregate <-  function(dat, col_names=c("sero_event", "pyears")) {
  if (!(class(dat) %in% c("imputationList", "list")))
    stop("Data must be a list of >= 1 dataset")
  same <- sapply(dat, function(x) nrow(x))
  if (abs(max(same) - min(same)) != 0)
    stop("Your datasets have different nrows")
  getEst <- function(dat, col_name) {
    out <- as.matrix(sapply(dat, "[[", col_name))
    rowMeans(out, na.rm=TRUE)
  }
  dat0 <- dat[[1]]
  dat0 <- dat0[, !(colnames(dat0) %in% col_names), drop=FALSE]
  out <- data.frame(lapply(col_names, function(x) getEst(dat, x)))
  colnames(out) <- col_names
  cbind(dat0, out)
}


#' @title  Calculate annual HIV incidence rates using the single random-point
#' imputation. 
#' @description  Calculate annual HIV incidence rates using the single random-point
#' approach and multiple imputation. 
#' @param Args takes list from \code{\link{setArgs}}.
#' @param sformula A list of formulas for the poisson regression models. Default is: "sero_event ~ -1 + as.factor(Year) + Age + as.factor(Year):Age + offset(log(tscale))"
#' @param aggFun Function to aggregate sero events and person-time by, see \code{\link{AggFunc}}. Default is \code{\link{AggByYear}}.
#' @param newdata Dataset of variables to predict incidence, see \code{\link{MIpredict}}.  Default is \code{getAgeYear(dat=setHIV(Args))}.
#' @return list
#' @export 
#' @examples
#' Args <- setArgs(Age=list(All=c(15, 49)), Years=c(2004:2018), nSim=2)
#' age_dat <- getAgeYear(dat=setHIV(Args))
#' sformula = "sero_event ~ -1 + as.factor(Year) + Age + as.factor(Year):Age + offset(log(tscale))"
#' getIncidence(Args, sformula, AggByYear, age_dat)
getIncidence <- function(
  Args=setArgs(), sformula="", aggFun=NULL, newdata=NULL) {
  if (Args$nSim < 2) stop('nSim in setArgs() must be > 1')
  if (sformula == "")
    sformula = "sero_event ~ -1 + as.factor(Year) + Age + as.factor(Year):Age + offset(log(tscale))"
  if (is.null(aggFun)) aggFun = AggByYear
  if (is.null(newdata)) newdata <- getAgeYear(dat=setHIV(Args))
  rtdat <- getRTData()
  mdat <- MIdata(rtdat, Args)
  agg_inc <- lapply(mdat, aggFun)
  agg_inc <- MIaggregate(agg_inc)
  mdat <- mitools::imputationList(mdat)
  mods <- with(mdat, stats::glm(as.formula(sformula), family=poisson))
  pois_inc <- MIpredict(mods, newdata)
  list(agg=agg_inc, pois_inc=pois_inc)
}
vando026/ahri documentation built on Aug. 10, 2024, 3:20 p.m.