R/GenEpisode.R

Defines functions GenEpisode

Documented in GenEpisode

#' @title Calculate pseudo admission episodes
#'
#' @description  Read in glucose data and calculate pseudo admission episodes,
#'   calculate the time interval between consecutive readings and accumulated
#'   time intervals within each episode for each individual, identify the day in
#'   a week, month, year, week number in a year, convert timings into hours
#'   using the standard 24-hour clock.
#'
#'
#' @param dat A \emph{data.frame} with at least have three key variables:
#' \itemize{
#'  \item{RESULT.DATE} a date and time column of classes \dQuote{POSIXlt} and
#'    \dQuote{POSIXct} (can be generated by \code{\link{FormatDate}});
#'  \item{ADMISSION.ID} a character or numeric column for the ID of each
#'    hospital stay;
#'  \item{RESULT} a numeric column for Blood Glucose readings.
#' }
#' @param epiMethod This indicates the method takes to compute episodes. If
#'   \code{Admininfo} (the default), the admission id will be assumed to be case
#'   id; if \code{Pseudo}, then episode number will be generated using 48 hours
#'   as a cutoff.
#'
#' @return The input \emph{data.table} with the following additional variables:
#' \item{LOS.EACH}{time difference between readings}
#' \item{LOS.PSUM}{cumulative time differences between readings}
#' \item{EPISODE.ID}{In the case of pseudo, admission times will be increase by
#' 1 if LOS.EACH greater than 48 hours. Otherwise, it will always be 1,
#' corresponding to that each admission id identifies one hospital stay.}
#' \item{mond}{day of month}
#' \item{weekd}{day of week}
#' \item{yday}{day of year}
#' \item{hour}{numerical hours}
#' \item{weekn}{number of weeks in a year}
#' These variables are created using original key variable \code{RESULT.DATE}.
#' @examples
#' # Load example data
#' data("gluDat")
#' # Focus on data from Ward A. Process date-time variable and blood glucose readings:
#' gluDat2 <- FormatDate(dat = gluDat[gluDat$LOCATION == "A", ], yy = 2020, mm = 7)
#' # Specify admission episodes based on admission ID:
#' gluDat3 <- GenEpisode(dat = gluDat2, epiMethod = "Admininfo")
#' @author Chuen Seng Tan, Ying Chen
#' @import data.table
#' @export
GenEpisode <- function(dat, epiMethod = "Admininfo") {
  epiMethod <- tolower(epiMethod)
  epiMethod <- match.arg(epiMethod, c("admininfo", "pseudo"))
  dat <- data.table(dat)
  # dat <- unique(dat)
  setkey(dat, ADMISSION.ID, RESULT.DATE)
  dat[, LOS.EACH := c(0, diff(as.double(RESULT.DATE))) / 3600,
      by = list(ADMISSION.ID)] # Compute Time Difference for each observation of each patient

  if (epiMethod == 'pseudo') {
    # Generate Admission ID based on 48h criteria
    dat[, EPISODE.ID := .SD[, rep(1:(length(.I[LOS.EACH > 48]) + 1),
                                  diff(setdiff(c(1, .I[LOS.EACH > 48], .N + 1),0)))],
        by = list(LOCATION, ADMISSION.ID)]
    ## treat each patient as one patient-stay
  } else {
    # if(length(id) > 0){
    #   warning(paste0("We removed ", length(id), " (",round(length(id)/nrow(dat)*100,2),"% ) "," samples without both admission and discharge information."))
    #   dat = dat[-id, ]
    # }
    #
    # dat[, case.id := paste(, , sep = "|")] # concatenating patient id and admission date and discharge date to create caseid
    # if both admission/discharge date are NA, reomove it
    # setnames(dat, names(dat)[caseidCol], "case.id")
    #
    # dat[, intermediate:= mean(RESULT.DATE), by = list(ADMISSION.ID, case.id)]
    # dat[, EPISODE.ID := rank(unique(intermediate))[match(intermediate,unique(intermediate))], by = ]
    dat[, EPISODE.ID := 1]
  }

  setkey(dat, LOCATION, ADMISSION.ID, EPISODE.ID, RESULT.DATE)
  dat[, LOS.PSUM := cumsum(LOS.EACH) - .SD[which.min(RESULT.DATE),]$LOS.EACH,
      by = list(ADMISSION.ID, EPISODE.ID)] # Compute cummulative sum of duration, hence the maximum of it should be the length of stay.

  # dat[, RESULT := NULL]

  # dat[, weekd := as.numeric(format(.SD$RESULT.DATE, "%u"))] # give the day of the week in 1 to 7
  # dat[, weekn := as.numeric(format(.SD$RESULT.DATE, "%W"))] # give the week of the year from 1 to 53
  # dat[, mond := as.numeric(format(.SD$RESULT.DATE, "%e"))] # give the day of the month from 01 to 31
  # dat[, yday := as.numeric(format(.SD$RESULT.DATE, "%j"))] # give the day of the year, a number in 1 to 366
  dat$hour = as.numeric(difftime(
    dat$RESULT.DATE,
    paste(substring(dat$RESULT.DATE, 1, 10), "00:00:00", sep = " "),
    units = "hours"
  )) # tranfrom timing to numeric hours
  # Create RESULT.MEAN column for GenGluM, and remove rows where RESULT.MEAN is NA
  dat$RESULT.MEAN <- suppressWarnings(as.numeric(dat$RESULT))
  n_na <- sum(is.na(dat$RESULT.MEAN))
  if (n_na != 0) {
    message(simpleMessage(sprintf(
      "%d rows with non-numeric glucose readings are removed.\n", n_na
    )))
  }
  out <- dat[!is.na(RESULT.MEAN)]
  return(out)
}
nyilin/QcDM documentation built on June 29, 2021, 1:14 a.m.