#' @title Calculate pseudo admission episodes
#'
#' @description Read in glucose data and calculate pseudo admission episodes,
#' calculate the time interval between consecutive readings and accumulated
#' time intervals within each episode for each individual, identify the day in
#' a week, month, year, week number in a year, convert timings into hours
#' using the standard 24-hour clock.
#'
#'
#' @param dat A \emph{data.frame} with at least have three key variables:
#' \itemize{
#' \item{RESULT.DATE} a date and time column of classes \dQuote{POSIXlt} and
#' \dQuote{POSIXct} (can be generated by \code{\link{FormatDate}});
#' \item{ADMISSION.ID} a character or numeric column for the ID of each
#' hospital stay;
#' \item{RESULT} a numeric column for Blood Glucose readings.
#' }
#' @param epiMethod This indicates the method takes to compute episodes. If
#' \code{Admininfo} (the default), the admission id will be assumed to be case
#' id; if \code{Pseudo}, then episode number will be generated using 48 hours
#' as a cutoff.
#'
#' @return The input \emph{data.table} with the following additional variables:
#' \item{LOS.EACH}{time difference between readings}
#' \item{LOS.PSUM}{cumulative time differences between readings}
#' \item{EPISODE.ID}{In the case of pseudo, admission times will be increase by
#' 1 if LOS.EACH greater than 48 hours. Otherwise, it will always be 1,
#' corresponding to that each admission id identifies one hospital stay.}
#' \item{mond}{day of month}
#' \item{weekd}{day of week}
#' \item{yday}{day of year}
#' \item{hour}{numerical hours}
#' \item{weekn}{number of weeks in a year}
#' These variables are created using original key variable \code{RESULT.DATE}.
#' @examples
#' # Load example data
#' data("gluDat")
#' # Focus on data from Ward A. Process date-time variable and blood glucose readings:
#' gluDat2 <- FormatDate(dat = gluDat[gluDat$LOCATION == "A", ], yy = 2020, mm = 7)
#' # Specify admission episodes based on admission ID:
#' gluDat3 <- GenEpisode(dat = gluDat2, epiMethod = "Admininfo")
#' @author Chuen Seng Tan, Ying Chen
#' @import data.table
#' @export
GenEpisode <- function(dat, epiMethod = "Admininfo") {
epiMethod <- tolower(epiMethod)
epiMethod <- match.arg(epiMethod, c("admininfo", "pseudo"))
dat <- data.table(dat)
# dat <- unique(dat)
setkey(dat, ADMISSION.ID, RESULT.DATE)
dat[, LOS.EACH := c(0, diff(as.double(RESULT.DATE))) / 3600,
by = list(ADMISSION.ID)] # Compute Time Difference for each observation of each patient
if (epiMethod == 'pseudo') {
# Generate Admission ID based on 48h criteria
dat[, EPISODE.ID := .SD[, rep(1:(length(.I[LOS.EACH > 48]) + 1),
diff(setdiff(c(1, .I[LOS.EACH > 48], .N + 1),0)))],
by = list(LOCATION, ADMISSION.ID)]
## treat each patient as one patient-stay
} else {
# if(length(id) > 0){
# warning(paste0("We removed ", length(id), " (",round(length(id)/nrow(dat)*100,2),"% ) "," samples without both admission and discharge information."))
# dat = dat[-id, ]
# }
#
# dat[, case.id := paste(, , sep = "|")] # concatenating patient id and admission date and discharge date to create caseid
# if both admission/discharge date are NA, reomove it
# setnames(dat, names(dat)[caseidCol], "case.id")
#
# dat[, intermediate:= mean(RESULT.DATE), by = list(ADMISSION.ID, case.id)]
# dat[, EPISODE.ID := rank(unique(intermediate))[match(intermediate,unique(intermediate))], by = ]
dat[, EPISODE.ID := 1]
}
setkey(dat, LOCATION, ADMISSION.ID, EPISODE.ID, RESULT.DATE)
dat[, LOS.PSUM := cumsum(LOS.EACH) - .SD[which.min(RESULT.DATE),]$LOS.EACH,
by = list(ADMISSION.ID, EPISODE.ID)] # Compute cummulative sum of duration, hence the maximum of it should be the length of stay.
# dat[, RESULT := NULL]
# dat[, weekd := as.numeric(format(.SD$RESULT.DATE, "%u"))] # give the day of the week in 1 to 7
# dat[, weekn := as.numeric(format(.SD$RESULT.DATE, "%W"))] # give the week of the year from 1 to 53
# dat[, mond := as.numeric(format(.SD$RESULT.DATE, "%e"))] # give the day of the month from 01 to 31
# dat[, yday := as.numeric(format(.SD$RESULT.DATE, "%j"))] # give the day of the year, a number in 1 to 366
dat$hour = as.numeric(difftime(
dat$RESULT.DATE,
paste(substring(dat$RESULT.DATE, 1, 10), "00:00:00", sep = " "),
units = "hours"
)) # tranfrom timing to numeric hours
# Create RESULT.MEAN column for GenGluM, and remove rows where RESULT.MEAN is NA
dat$RESULT.MEAN <- suppressWarnings(as.numeric(dat$RESULT))
n_na <- sum(is.na(dat$RESULT.MEAN))
if (n_na != 0) {
message(simpleMessage(sprintf(
"%d rows with non-numeric glucose readings are removed.\n", n_na
)))
}
out <- dat[!is.na(RESULT.MEAN)]
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.