R/NMcreateDoses.R
In NMsim: Seamless 'Nonmem' Simulation Platform

Documented in NMcreateDoses

##' Easily and flexibly generate dosing records
##'
##' Columns will be extended by repeating last value of the column if
##' needed in order to match length of other columns. Combinations of
##' different columns can be generated by specifying covariates on the
##' columns where the regimens differ. 
##' @param TIME The time of the dosing events. Required.
##' @param AMT vector or data.frame with amounts amount. Required.
##' @param EVID The event ID to use for doses. Default is to use
##'     EVID=1, but EVID might also be wanted.
##' @param CMT Compartment number. Default is to dose into CMT=1. Use
##'     `CMT=NA` to omit in result.
##' @param ADDL Number of additional dose events. Must be in
##'     combination with and consistent with II. Notice if of length
##'     1, only applied to last event in each regimen.
##' @param II Dosing frequency of additional events specified in
##'     `ADDL`. See `ADDL` too.
##' @param RATE Infusion rate. Optional.
##' @param SS steady-state flag. Optional.
##' @param addl A list of ADDL and II that will be applied to last
##'     dose. This may be prefered if II and ADDL depend on covariates
##'     - see examples. Optional.
##' @param addl.lastonly If ADDL and II are of length 1, apply only
##'     to last event of a regimen? The default is `TRUE`.
##' @param col.id Default is to denote the dosing regimens by an ID
##'     column. The name of the column can be modified using this
##'     argument. Use `col.id=NA` to omit the column altogether. The
##'     latter may be wanted if repeating the regimen for a number of
##'     subjects after running `NMcreateDoses()`.
##' @param as.fun The default is to return data as a data.frame. Pass
##'     a function (say `tibble::as_tibble`) in as.fun to convert to
##'     something else. If data.tables are wanted, use
##'     as.fun="data.table". The default can be configured using
##'     NMdataConf.
##' @details Only TIME and AMT are required. AMT, RATE, SS, II, ADDL,
##'     CMT are of length 1 or longer. Those not of max length 1 are
##'     repeated.  If TIME is longer than those, they are extended to
##'     match length of TIME. All these arguments can be data.frames
##'     with additional columns that define distinct dosing regimens -
##'     with distinct subject ids. However, if covariates are applied
##'     to ADDL+II, see the addl argument and see examples.
##'
##' Allowed combinations of
##'     AMT, RATE, SS, II here:
##'     \url{https://ascpt.onlinelibrary.wiley.com/doi/10.1002/psp4.12404}
##' @return A data.frame with dosing events
##' @examples
##' library(data.table)
##' ## Users should not use setDTthreads. This is for CRAN to only use 1 core.
##' data.table::setDTthreads(1) 
##' ## arguments are expanded - makes loading easy
##' NMcreateDoses(TIME=c(0,12,24,36),AMT=c(2,1))
##' ## Different doses by covariate
##' NMcreateDoses(TIME=c(0,12,24),AMT=data.table(AMT=c(2,1,4,2),DOSE=c(1,2)))
##' ## Make Nonmem repeat the last dose. This is a total of 20 dosing events.
##' ## The default, addl.lastonly=TRUE means if ADDL and II are of
##' ## length 1, they only apply to last event.
##' NMcreateDoses(TIME=c(0,12),AMT=c(2,1),ADDL=9*2,II=12)
##' dt.amt <- data.table(DOSE=c(100,400))
##' ## multiple dose regimens. 
##' ## Specifying the time points explicitly
##' dt.amt <- data.table(AMT=c(200,100,800,400)*1000,DOSE=c(100,100,400,400))
##' doses.md.1 <- NMcreateDoses(TIME=seq(0,by=24,length.out=7),AMT=dt.amt)
##' doses.md.1$dose <- paste(doses.md.1$DOSE,"mg")
##' doses.md.1$regimen <- "QD"
##' doses.md.1
##' ## or using ADDL+II
##' dt.amt <- data.table(AMT=c(200,100,800,400)*1000,DOSE=c(100,100,400,400))
##' doses.md.2 <- NMcreateDoses(TIME=c(0,24),AMT=dt.amt,addl=data.table(ADDL=c(0,5),II=c(0,24)))
##' doses.md.2$dose <- paste(doses.md.2$DOSE,"mg")
##' doses.md.2$regimen <- "QD"
##' doses.md.2
##' ## ADDL and II can be wrapped in a data.frame. This allows including covariates
##' NMcreateDoses(TIME=c(0,12),AMT=c(2,1),addl=data.frame(ADDL=c(NA,9*2),II=c(NA,12),trt=c("A","B")))
##' @import data.table
##' @import NMdata
##' @export

NMcreateDoses <- function(TIME, AMT=NULL, EVID=1, CMT=1, ADDL=NULL, II=NULL, RATE=NULL, SS=NULL, addl=NULL, addl.lastonly=TRUE, col.id="ID", as.fun){
    

#### Section start: Dummy variables, only not to get NOTE's in pacakge checks ####

    . <- NULL
    all.na <- NULL
    as.formula <- NULL
    ID <- NULL
    MDV <- NULL
    Nna <- NULL
    length.time <- NULL
    max.length <- NULL
    value <- NULL
    variable <- NULL
    ROW <- NULL

    
### Section end: Dummy variables, only not to get NOTE's in pacakge checks

    if(missing(as.fun)) as.fun <- NULL
    as.fun <- NMdata:::NMdataDecideOption("as.fun",as.fun)
    
    list.doses <- list(TIME=TIME, EVID=EVID, CMT=CMT, AMT=AMT, RATE=RATE, SS=SS , ADDL=ADDL, II=II
                       )
    if(!is.null(addl)){
        if(!is.null(ADDL) || !is.null(II) ) {
            stop("Provide either ADDL+II or addl.")
        }
        if(length(addl$ADDL) != length(addl$II)){
            stop("ADDL and II must be of equal length")
        }
        addl <- as.data.table(addl)
        ## dropping II for ADDL in order to keep covariates.
        list.doses$ADDL <- addl[,setdiff(colnames(addl),"II"),with=FALSE]
        list.doses$II <- addl[,setdiff(colnames(addl),"ADDL"),with=FALSE]
    }
    
    ## disregard the ones that were not supplied
    list.doses <- list.doses[!sapply(list.doses,is.null)]


    ## convert to dt's    
    names.doses <- names(list.doses)

    list.doses <- lapply(names.doses,function(x){
        dt <- list.doses[[x]]
        if(is.data.frame(dt)){
            if(!is.data.table(dt)){
                dt <- as.data.table(dt)
            }
            return(dt)
        } else {
            DT <- data.table(x1=dt)
            setnames(DT,"x1",x)
            return(DT)
        }
    })

#### check that TIME is long enough
    dt.lengths <- data.table(name=names.doses,
                             length=sapply(list.doses,nrow))

    
    
### make use of merge.data.frame to get outer merges where if no
### common columns found.
    df.doses <- lapply(list.doses,as.data.frame)
    res <- Reduce(merge,df.doses)
    dt.doses1 <- as.data.table(res)
    
    names(list.doses) <- names.doses
    
    ## stack all dt's, fill=T
    ##  dt.doses1 <- rbindlist(list.doses,fill=T)

    ## identify covs
    ##     covs <- setdiff(colnames(dt.doses1),c(names.doses,"II","ADDL"))
    covs <- setdiff(colnames(dt.doses1),c(names.doses))
    if("ID" %in% covs) stop("ID is currently not allowed as a covariate. Please use a different name and adjust the result accordingly.")
    combs <- unique(dt.doses1[,covs,with=F])

    ## get rid of all combs that contain NA
    col.row <- tmpcol(combs)
    combs[,(col.row):=.I]
    combs[,Nna:=sum(is.na(.SD)),by=col.row]
    
### This used to drop rows. That seems too risky. Give warning if there are any.
    nrows.na <- combs[Nna>0][,.N]
    if(nrows.na){
        warning("NA values among covariates. This may give unintended results.")
    }
    ## combs <- combs[Nna==0]
    ## combs <- combs[Nna<length(covs)]

    combs[,(col.row):=NULL]
    combs[,Nna:=NULL]
    ## expand all to include all combs
### the name (EVID, AMT, etc) col must be renamed to value. For dcast.
    list.doses.exp <- lapply(names.doses,function(name){
        
###### egdt with the unique combs of those we dont have already.
        ## name <- names.doses[1]
        elem <- list.doses[[name]]
        ## egdt(transform(elem,elem=name),combs[,setdiff(names(combs),names(elem)),with=FALSE] ,quiet=T)
        if("variable"%in%colnames(elem)) stop("a column called variable is not allowed in provided data.")
        if(!name%in%colnames(elem)) stop("If data is given as a data.table, the argument name has to be a column in data too.")
        egdt(
            melt(elem,measure.vars=name)
           ,
            unique(combs[,setdiff(names(combs),names(elem)),with=FALSE])
           ,quiet=T)
        
    })
    ## dt.doses1 <- unique(rbindlist(list.doses.exp,fill=T))
    dt.doses1 <- rbindlist(list.doses.exp,fill=T)
    ## assign ID counter
    if(!"ID"%in%covs){
        dt.doses1[,ID:=.GRP,by=covs]
    }
    ## calc max length within ID
    dt.doses1[,length:=.N,by=.(ID,variable)]
    dt.doses1[,max.length:=max(length),by=.(ID)]

########  checks of lengths of TIME and of II+ADDL should go here.
    
    dt.doses1[,length.time:=max(length[variable=="TIME"]),by=c("ID")]
    if(dt.doses1[,any(length>length.time)]){
        stop("TIME must be at least as long as the other provided variables.")
    }

    dt.doses1[,all.na:=all(is.na(value)),by=c("ID","variable")]
    dt.doses1 <- dt.doses1[all.na==FALSE]
    
### ADDL+II checks
    if(sum(c("ADDL","II")%in%dt.doses1$variable)==1){
        stop("ADDL and II must either be omitted or both provided.")
    }
    if("ADDL"%in%dt.doses1$variable){
        dt.tmp <- dt.doses1[variable%in%c("ADDL","II"),.(length.var=.N),by=c("ID","variable")]
        dt.tmp <- dcast(dt.tmp,ID~variable,value.var="length.var")
        if(!all(dt.tmp[,ADDL==II])){
            stop("ADDL and II must be of equal length. If constructing multiple dosing schemes, they must be eqully long for all schmes.")
        }
    }

    if(addl.lastonly){
        if(any(dt.doses1[variable=="ADDL",length==1&max.length>1])){
            
            dt.doses1 <- rbind(
                dt.doses1[!variable%in%c("ADDL","II")]
               ,
                dt.doses1[variable%in%c("ADDL","II")][,.SD[rep(1:(length.time-1))],by=c("ID","variable",covs)][,value:=NA]
               ,
                dt.doses1[variable%in%c("ADDL","II")][,.SD[.N],by=c("ID","variable",covs)]
            )
        }
    }

    ## expand to max length for each elem within each comb of covs
    dt.doses2 <-
        rbindlist(lapply(split(dt.doses1,by=c("variable","ID")),function(sub) {
            if(nrow(sub)==0) return(NULL)
            sub[c(1:.N,rep(.N,unique(sub$max.length)-.N))]}))

    ## assign row counter within comb
    dt.doses2[,ROW:=1:.N,by=c("ID","variable")]

    
    ## dcast
    res <- dcast(dt.doses2,as.formula(paste(paste0(c("ID","ROW",covs),collapse="+"),"~variable")),value.var="value")
    res[,ROW:=NULL]
    res[,MDV:=1]

    ## because covariates can be a mix of types/classes, at least the
    ## value column may be a list at this point. Recoding that.
    res <- lapply(res,unlist)
    res <- as.data.table(res)
    ## order rows and columns. 
    
    setorderv(res,intersect(c("ID","TIME","CMT"),colnames(res)))
    res <- NMorderColumns(res)
    
    if(is.na(col.id)){
        res[,ID:=NULL]
    } else {
        setnames(res,"ID",col.id)
    }
    
    
    ## done
    as.fun(res)
}