R/getAdmLimits.R

Defines functions getAdmLimits

Documented in getAdmLimits

### getAdmLimits.R ---
#----------------------------------------------------------------------
## author: Thomas Alexander Gerds
## created: Aug  4 2016 (19:43)
## Version:
## last-updated: Jan 17 2020 (07:37) 
##           By: Thomas Alexander Gerds
##     Update #: 83
#----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
#----------------------------------------------------------------------
##
### Code:
##' Adds first admission date and last discharge date to a sequence of admissions. The default input data.table is the current standard
##' by which admissions are available in Statistics Denmark. 
##'
##' @title Get Admission time
##' @param dt data.table containing the variables
##' @param pnr Variable with ID for each subject/group (default is \code{pnr}).
##' @param inddto Variable with entry times. Must be date or numeric (default is \code{inddto}). 
##' @param uddto Variable with exit times. Must be date or numeric (default is \code{uddto}).
##' @param keep Vector of names of variables of \code{dt} to keep in the output 
##' @param collapse if \code{TRUE} return only the lines with non-overlapping admissions
##' @param error Character. One of \code{"warn"}, \code{"remove"} remove lines with errors,
##' \code{"flag"} add a new variable called error. An error is defined as either a missing value in
##' either inddto or uddto or when inddto>uddto.
##' @return data.table with two new variables first.indate and last.outdate.
##' @examples
##' set.seed(8)
##' lpr <- simAdmissionData(10)
##' ## Variables have default names
##' adm <- getAdmLimits(lpr)
##'
##' ## with errors
##' lpr1 <- lpr
##' lpr1 <- lpr1[uddto>as.Date("2010-01-01"),uddto:=NA]
##' adm1 <- getAdmLimits(lpr1)
##' adm1 <- getAdmLimits(lpr1,error="remove")
##' adm1 <- getAdmLimits(lpr1,error="flag")
##' 
##' ## case where Variables have custom names
##' data.table::setnames(lpr, c('pnr','inddto','uddto'), c('personid', 'Entrydate','Outdate'))
##' adm2 <- getAdmLimits(lpr,pnr='personid',inddto='Entrydate',uddto='Outdate')
##'
##' ## duplicated and overlap
##' set.seed(8)
##' lpr3 <- simAdmissionData(10)
##' lpr3 <- lpr3[sample(1:NROW(lpr3),replace=TRUE,size=round(NROW(lpr3)*1.5))]
##' adm3 <- getAdmLimits(lpr3,collapse=TRUE)
##' 
##' @export
##' @author Thomas A. Gerds <tag@@biostat.ku.dk>
getAdmLimits <- function(dt,
                         pnr="pnr",
                         inddto="inddto",
                         uddto="uddto",
                         keep=NULL,
                         collapse=FALSE,
                         error="warn"){
    .SD=.I=NULL
    first.indate <- .N <- last.outdate <- NULL
    ## select relevant variables
    wdt=dt[,c(pnr,inddto,uddto,keep),with=FALSE]
    if (!is.data.table(wdt)){
        setDT(wdt)
    }
    orig.names <- copy(names(wdt))
    ## Check variable types
    datecheckerInd=FALSE
    if (class(wdt[[inddto]])=="Date"){
        datecheckerInd=TRUE
    } else if (!is.numeric(wdt[[inddto]])){
        stop("Error: inddto must be numeric or Date")
    }  
    datecheckerUd=FALSE
    if (class(wdt[[uddto]])=="Date"){
        datecheckerUd=TRUE
    } else if (!is.numeric(wdt[[uddto]])){
        stop("Error: uddto must be numeric or Date")
    }    
    ## flag rows with inddto>uddto and NA in inddto
    wdt[,error:=is.na(.SD[[1]])|is.na(.SD[[2]])|.SD[[1]]>.SD[[2]],.SDcols=c(inddto,uddto)]
    switch(error,"flag"={
        ## done this already
    },"remove"={
        wdt <- wdt[wdt[["error"]]==FALSE]
        wdt[,error:=NULL]
    },"warn"={
        if (any(wdt[["error"]])){
            warning("Data have errors: ",immediate.=TRUE)
            print(wdt[wdt[["error"]]])
        }else{
            wdt[,error:=NULL]
            message("No data errors.")
        }
    })
    ## Convert Dates to numeric
    if (datecheckerInd==TRUE){
        set(wdt,j=inddto,value=as.integer(wdt[[inddto]]))
    }
    if (datecheckerUd==TRUE){
        set(wdt,j=uddto,value=as.integer(wdt[[uddto]]))
    }
    ## Sort by pnr and admission dates
    setkeyv(wdt,c(pnr,inddto))
    ## Latest admission date by pnr
    wdt[,`:=`(last.outdate=cummax(.SD[[1]])),.SDcols=uddto,by=pnr] 
    ## Start of each admission
    wdt[,first.indate:= cumsum(c(1L,(1L*(.SD[[1]]>shift(.SD[[2]],n=1)))[-1])),.SDcols=c(inddto,"last.outdate"),by=pnr]
    ## Start and end date for the hospitalization
    wdt[,':='(first.indate=.SD[[1]][1] , last.outdate=.SD[[2]][.N]),.SDcols=c(inddto,"last.outdate"),by=c(pnr,"first.indate")]
    ## Change columns around
    if ("error"%in%names(wdt))
        setcolorder(wdt, c(orig.names,"first.indate","last.outdate","error"))
    else
        setcolorder(wdt, c(orig.names,"first.indate","last.outdate"))
    ## Convert back in case input was format Date
    if(datecheckerInd==TRUE){
        set(wdt,j=inddto,value=as.Date(wdt[[inddto]],origin="1970-01-01"))
    }
    if(datecheckerUd==TRUE){
        set(wdt,j=uddto,value=as.Date(wdt[[uddto]],origin="1970-01-01"))
    }
    wdt[,first.indate:=as.Date(first.indate,origin="1970-01-01")]
    wdt[,last.outdate:=as.Date(last.outdate,origin="1970-01-01")]
    if (collapse){
        # remove now meaningless input columns
        set(wdt,j=inddto,value=NULL)
        set(wdt,j=uddto,value=NULL)
        n.before <- NROW(wdt)
        collapse.cols <- c(pnr,"first.indate","last.outdate")
        wdt <- wdt[wdt[, .I[1],by=collapse.cols]$V1]
        if (n.before==NROW(wdt))
            message("Nothing to collapse.")
        else
            message("Collapsed ",n.before," rows with potentially multiple and overlapping periods into ",NROW(wdt)," rows.")
    }
    ## Create list and return
    class(wdt) <- c("admlimits",class(wdt))
    wdt[]
}

#----------------------------------------------------------------------
### getAdmLimits.R ends here
tagteam/heaven documentation built on March 24, 2024, 7:58 a.m.