R/puzzle.R
In puzzle: Assembling Data Sets for Non-Linear Mixed Effects Modeling

Documented in puzzle

#' @title puzzle
#'
#' @description Build pharmacometric data sets from basic tabulated files
#'
#' @authors Olivier Barriere and Mario Gonzalez Sales
#'
#' @param directory path to your directory
#' @param order define the absorption order, can be 0, 1, c(0,1), or c(1,1)
#' @param pk define the required file containing the pk information. It can be a .csv or an .xlsx file
#' @param dose define the required file containing the dose information. It can be a .csv, an .xlsx file or an R object.
#' @param cov define the optional file containing the covariate information. It can be a .csv, an .xlsx file or an R object.
#' @param pd define the optional file containing the pd information. It can be a .csv, or a .xlsx file.
#' @param extratimes define the optional file containing the additional times. It can be a .csv, or a .xlsx file.
#' @param nm name of output file generated by puzzle
#' @param timeunits define time units if needed
#' @param arrange define how the columns should be arranged
#' @param optionalcolumns define optional columns
#' @param initialindex define the lower category of categorical covariates
#' @param coercion define name for coercion file
#' @param fillcolumns define columns to be filled
#' @param nocoercioncolumns define columns to be dropped from the coercion file
#' @param norepeatcolumns define columns not to be repeated
#' @param na.strings define value for na
#' @param datetimeformat define format for date times
#' @param timezone define timezone
#' @param ignore define ignore value
#' @param missingvalues define missing value
#' @param verbose define verbose
#' @param parallel define parallel zero + first order absorption
#' @param username define person performing the assembling
#' 
#' @return a pharmacometrics ready data set
#' @examples 
#' \dontrun{
#' nm = list(pk = list(parent=as.data.frame(puzzle::df_pk_start)),
#'           dose=as.data.frame(puzzle::df_dose_start), 
#'           cov=as.data.frame(puzzle::df_cov_start))
#' puzzle(directory=file.path(tempdir()), 
#'        order=c(0), 
#'        pk=list(data=nm$pk), 
#'        dose=list(data=nm$dose), 
#'        cov=list(data=nm$cov))
#' }
#' @export
#' 
#'
puzzle = function(directory=NULL,
                  order,
                  coercion=list(name=NULL,sep=","),
                  optionalcolumns=NULL,
                  pk=list(name=NULL,data=NULL),
                  dose=list(name=NULL,data=NULL),
                  cov=list(name=NULL,data=NULL),
                  pd=list(name=NULL,data=NULL),
                  extratimes=list(name=NULL,data=NULL),
                  nm=list(name=NULL),
                  fillcolumns=NULL,
                  nocoercioncolumns=NULL,
                  norepeatcolumns=NULL,
                  initialindex=0,
                  na.strings="N/A",
                  arrange="ID,TIME,CMT,desc(EVID)",
                  datetimeformat="%Y-%m-%d %H:%M:%S",
                  timeunits="hours",
                  timezone=Sys.timezone(),
                  ignore="C",
                  missingvalues=".",
                  parallel=TRUE,
                  verbose=FALSE,
                  username=NULL
) {
  
  options(warn = -1)
  
  if(missing(pk) & missing(pd) & missing(cov) & missing(dose)){
    stop("Have you forgotten to define the arguments for puzzle()?. 
  Do you need help? Please visit: https://syneoshealth.github.io/puzzle")
  }
  
  repeat.before = function(x) {
    ind = which(!is.na(x))
    if(is.na(x[1]))
      ind = c(1,ind)
    rep(x[ind], times = diff(
      c(ind, length(x) + 1) ))
  }
  
  repeat.before.id = function(df) {
    for (id in unique(df$ID)) {
      df[df$ID==id,2]=repeat.before(df[df$ID==id,2])
    }
    return(df[,2])
  }
  
  rbinddiff = function(...) {
    dots=list(...)
    
    if (length(dots)==0) return(NULL)
    
    df=dots[[1]]
    for (i in 1+seq_len(length(dots)-1)) {
      df=rbinddiff2(df,dots[[i]])
    }
    return(df)
  }
  
  rbinddiff2 = function(a,b) {
    notina=setdiff(names(b),names(a))
    notinb=setdiff(names(a),names(b))
    if (nrow(a)>0) a[,notina]=NA
    if (nrow(b)>0) b[,notinb]=NA
    
    rbind(a,b)
  }
  
  convert.datetime = function(df) {
    if (!"DATETIME" %in% names(df)) {
      df$TIME=as.numeric(df$TIME)
    } else {
      df$DATETIME=as.POSIXct(strptime(df$DATETIME,format=datetimeformat,tz=timezone),tz=timezone)
    }
    return(df)
  }
  
  compute.time = function (df,dose) {
    if ("DATETIME" %in% names(df) & "DATETIME" %in% names(dose)) {
      df=plyr::join(df,plyr::ddply(dose,~ID,plyr::summarise,FIRSTDOSEDATETIME=min(DATETIME)),by="ID")
      if ("TIME" %in% names(df)) {
        df$TIME[is.na(df$TIME)]=as.numeric(difftime(df$DATETIME[is.na(df$TIME)],df$FIRSTDOSEDATETIME[is.na(df$TIME)],units=timeunits,tz=timezone))
      } else {
        df$TIME=as.numeric(difftime(df$DATETIME,df$FIRSTDOSEDATETIME,units=timeunits,tz=timezone))
      }
      df$FIRSTDOSEDATETIME=NULL
    } else {
      df$DATETIME=NA
    }
    return(df)
  }
  
  add.times = function(df,times) {
    if (!"EVID" %in% names(df)) df$EVID=0
    df$EXTRATIME=0
    if (!is.null(times)) {
      df2=stats::setNames(as.data.frame(times[rep(1:nrow(times),times=length(unique(df$ID))),]),names(times))
      if (!"ID" %in% names(times)) df2$ID=rep(unique(df$ID),each=nrow(times))
      df2=df2[!duplicated(rbind(df[,names(df2)],df2))[(nrow(df)+1):(nrow(df)+nrow(df2))],]
      
      if (nrow(df2)>0) {
        if (!"EVID" %in% names(df2)) df2$EVID=2
        df2$EXTRATIME=1
        df=rbinddiff(df,df2)
      }
      df=arrange(df,ID,TIME)
    }
    return(df)
  }
  
  convert.to.numeric = function (df,initialindex,na.strings) {
    for (name in names(df)) {
      if (class(df[,name])!="character") df[,name]=as.character(df[,name])
      df[,name][df[,name] %in% na.strings]=NA
      if (all(!(ifelse(is.na(df[,name]), NA, TRUE) & suppressWarnings(is.na(as.numeric(df[,name])))), na.rm=TRUE)) {
        df[,name]=as.numeric(df[,name])
      } else {
        df[,name]=as.factor(df[,name])
        lvl=data.frame(seq_along(levels(df[,name]))+initialindex-1,levels(df[,name]))
        message((paste0("Automatic coercion to numeric for ", name, "\n",
                        paste(paste(lvl[,1],lvl[,2],sep="="),collapse="\n"))))
        df[,name]=as.numeric(df[,name])+initialindex-1
        if (!is.null(coercion$data)) coercion$data <<- rbind(coercion$data,data.frame(VAR=name,stats::setNames(lvl,c("NUM","CHAR"))))
      }
    }
    return(df)
  }
  
  write.coercion.comments = function(df,file,sep=if ("sep" %in% names(coercion)) coercion$sep else ",") {
    fileConn=file(file)
    df$NUMCHAR=paste0(df$NUM,"=",df$CHAR)
    df=reshape2::dcast(df,VAR~NUM,value.var="NUMCHAR")
    lines=c()
    for (i in seq_len(nrow(df))) {
      lines=c(lines,paste0(df[i,1], ": ", paste(df[i,-1][!is.na(df[i,-1])],collapse=paste0(sep," "))))
    }
    writeLines(lines, fileConn)
    close(fileConn)
  }
  
  file.ext = function(x) {
    ext=regmatches(x, regexec("\\.([^\\.]+$)",x))[[1]][2]
    if (is.na(ext)) ext=""
    return(ext)
  }
  
  file.name = function(x) {
    name=regmatches(x, regexec("(.*)\\.[^\\.]+$",x))[[1]][2]
    if (is.na(name)) name=x
    return(name)
  }
  
  if (is.null(directory)) directory="" 
  
  if (is.null(pk$data)) {
    pk$data=list()
    if (tolower(file.ext(pk$name))=="csv") {
      pk$sheetnames=file.name(pk$name)
      pk$data[[1]]=utils::read.csv(file=file.path(directory,pk$name), na.strings=missingvalues)
    } else {
      pk$sheetnames = readxl::excel_sheets(path=file.path(directory,pk$name))
      for (i in seq_along(pk$sheetnames)) {
        pk$data[[i]]=as.data.frame(readxl::read_excel(path=file.path(directory,pk$name),sheet=i))
      }
    }
  } else {
    pk$sheetnames=names(pk$data)
  }
  
  if (is.null(dose$data)) {
    dose$data=utils::read.csv(file=file.path(directory,dose$name), na.strings=missingvalues)
  }
  
  if (is.null(cov$data)) {
    if (!is.null(cov$name) && nchar(cov$name)>0 && file.exists(file.path(directory,cov$name))) cov$data=utils::read.csv(file=file.path(directory,cov$name), na.strings=missingvalues)
  }
  
  if (is.null(pd$data)) {
    if (!is.null(pd$name) && nchar(pd$name)>0 && file.exists(file.path(directory,pd$name))) {
      pd$data=list()
      if (tolower(file.ext(pd$name))=="csv") {
        pd$sheetnames=file.name(pd$name)
        pd$data[[1]]=utils::read.csv(file=file.path(directory,pd$name), na.strings=missingvalues)
      } else {
        pd$sheetnames = readxl::excel_sheets(path=file.path(directory,pd$name))
        for (i in seq_along(pd$sheetnames)) {
          pd$data[[i]]=as.data.frame(readxl::read_excel(path=file.path(directory,pd$name),sheet=i))
        }
      }
    }
  } else {
    pd$sheetnames=names(pd$data)
  }
  
  if (is.null(extratimes$data)) {
    if (!is.null(extratimes$name) && nchar(extratimes$name)>0 && file.exists(file.path(directory,extratimes$name))) {
      extratimes$data=list()
      if (tolower(file.ext(extratimes$name))=="csv") {
        extratimes$data[[1]]=utils::read.csv(file=file.path(directory,extratimes$name), na.strings=missingvalues)
        names(extratimes$data)="pkpd"
      } else {
        extratimes$sheetnames = readxl::excel_sheets(path=file.path(directory,extratimes$name))
        for (i in seq_along(extratimes$sheetnames)) {
          extratimes$data[[i]]=as.data.frame(readxl::read_excel(path=file.path(directory,extratimes$name),sheet=i))
        }
        names(extratimes$data)=readxl::excel_sheets(path=file.path(directory,extratimes$name))
      }
    }
  }
  
  dose$data=convert.datetime(dose$data)
  dose$data=compute.time(dose$data,dose$data)
  dose$data$DATASOURCE="DOSE"
  
  if (!is.null(extratimes$data)) {
    EXTRATIME="EXTRATIME"
    names(extratimes$data)=tolower(names(extratimes$data))
    if ("pk" %in% names(extratimes$data)) names(extratimes$data)[names(extratimes$data)=="pk"]="pk1"
    if ("pd" %in% names(extratimes$data)) names(extratimes$data)[names(extratimes$data)=="pd"]="pd1"
    
    for (i in seq_along(extratimes$data)) {
      extratimes$data[[i]]=convert.datetime(extratimes$data[[i]])
      extratimes$data[[i]]=compute.time(extratimes$data[[i]],dose$data)
    }
    dose$data$EXTRATIME=0
  } else {
    EXTRATIME=NULL
  }
  
  for (i in seq_along(pk$data)) {
    pk$data[[i]]=convert.datetime(pk$data[[i]])
    pk$data[[i]]=compute.time(pk$data[[i]],dose$data)
    if (!is.null(extratimes$data)) {
      sheetname=names(extratimes$data)[names(extratimes$data) %in% c(paste0("pk",i), "pkpd")]
      pk$data[[i]]=add.times(pk$data[[i]],if (length(sheetname)!=0) extratimes$data[[sheetname]] else NULL)
    }
    pk$data[[i]]$DV=as.numeric(pk$data[[i]]$DV)
    pk$data[[i]]$DATASOURCE="PK"
  }
  
  if (!is.null(cov$data)) {
    if (!"TIME" %in% names(cov$data)) cov$data$TIME=NA
    if (!"VARIABLE" %in% names(cov$data)) cov$data=plyr::rename(reshape2::melt(cov$data,id.vars=c("ID","TIME",optionalcolumns)[c("ID","TIME",optionalcolumns) %in% names(cov$data)]),c("variable"="VARIABLE","value"="VALUE"))
    covcolumns=names(cov$data)[names(cov$data) %in% c("ID","TIME",optionalcolumns) & !names(cov$data) %in% c("VARIABLE","VALUE")]
    cov$data=convert.datetime(cov$data)
    cov$data=compute.time(cov$data,dose$data)
    cov$data$DATASOURCE="COV"
  }
  
  if (!is.null(pd$data)) {
    for (i in seq_along(pd$data)) {
      pd$data[[i]]=convert.datetime(pd$data[[i]])
      pd$data[[i]]=compute.time(pd$data[[i]],dose$data)
      if (!is.null(extratimes$data)) {
        sheetname=names(extratimes$data)[names(extratimes$data) %in% c(paste0("pd",i), "pkpd")]
        pd$data[[i]]=add.times(pd$data[[i]],if (length(sheetname)!=0) extratimes$data[[sheetname]] else NULL)
      }
      pd$data[[i]]$DV=as.numeric(pd$data[[i]]$DV)
      pd$data[[i]]$DATASOURCE="PD"
    }
  }
  
  
  RATE=if ("RATE" %in% names(dose$data)) "RATE" else NULL
  ADDL=if ("ADDL" %in% names(dose$data)) "ADDL" else NULL
  II=if ("II" %in% names(dose$data)) "II" else NULL
  SS=if ("SS" %in% names(dose$data)) "SS" else NULL
  BLQ=if ("BLQ" %in% unlist(lapply(pk$data,names))) "BLQ" else NULL
  LLOQ=if ("LLOQ" %in% unlist(lapply(pk$data,names))) "LLOQ" else NULL
  COVS=if (!is.null(cov$data)) as.character(sort(unique(cov$data$VARIABLE))) else NULL
  TYPE=if (!is.null(pd$data)) "TYPE" else NULL
  C=if (!is.null(ignore)) ignore else NULL
  
  dose$data$TYPE=0
  for (i in seq_along(pk$data)) {
    pk$data[[i]]$TYPE=1
    pk$data[[i]]$CMT=i
  }
  if (!is.null(pd$data)) {
    for (i in seq_along(pd$data)) {
      pd$data[[i]]$TYPE=2
      pd$data[[i]]$CMT=length(pk$data)+i
    }
  }
  
  nm$data=if (!is.null(pd$data)) rbinddiff(do.call(rbinddiff,pk$data),do.call(rbinddiff,pd$data)) else do.call(rbinddiff,pk$data)
  
  uids=unique(nm$data$ID)
  dose$data=subset(dose$data,ID %in% uids)
  if (!is.null(cov$data)) cov$data=subset(cov$data,ID %in% uids)
  
  nm$data$LDV=ifelse(nm$data$DV>0,log(nm$data$DV),NA)
  
  if (!"EVID" %in% names(nm$data)) nm$data$EVID=0
  if (!"MDV" %in% names(nm$data)) nm$data$MDV=ifelse(!is.na(nm$data$DV),0,1)
  if (!"EVID" %in% names(dose$data)) dose$data$EVID=1
  if (!"MDV" %in% names(dose$data)) dose$data$MDV=1
  
  if (length(order)==1 && order==0) {
    dose$data$CMT=1
  } else if (length(order)==1 && order==1) {
    dose$data$CMT=1
    nm$data$CMT=nm$data$CMT+1
  } else if (length(order)==2 && all(order==c(0,0))) {
    dose$data=reshape::expand.grid.df(dose$data,data.frame(CMT=c(1,1)))
  } else if (length(order)==2 && (all(order==c(0,1)) | all(order==c(1,0)))) {
    dose$data=reshape::expand.grid.df(dose$data,data.frame(CMT=c(1,2)))
    if (is.null(RATE)) {
      RATE="RATE"
      dose$data$RATE[dose$data$CMT==1]=0
    }
    dose$data$RATE[dose$data$CMT==2]=-2
    nm$data$CMT=nm$data$CMT+1
  } else if (length(order)==2 && all(order==c(1,1))) {
    dose$data=reshape::expand.grid.df(dose$data,data.frame(CMT=c(1,2)))
    nm$data$CMT=nm$data$CMT+2
  }
  
  nm$sheetnames=c(pk$sheetnames,pd$sheetnames)
  nm$cmts=unique(nm$data$CMT)
  lvl=data.frame(utils::tail(nm$cmts,length(nm$sheetnames)),nm$sheetnames)
  message((paste0("Automatic coercion to numeric for CMT\n",
                  paste(paste(lvl[,1],lvl[,2],sep="="),collapse="\n"))))
  if (!is.null(coercion$name)) coercion$data=data.frame(VAR="CMT",stats::setNames(lvl,c("NUM","CHAR")))
  
  nm$data=rbinddiff(nm$data,dose$data)
  
  if (nchar(arrange)>0) eval(parse(text=paste0("nm$data=dplyr::arrange(nm$data,",arrange,")")))
  nm$data$SORTINDEX=1:nrow(nm$data)
  
  if (!is.null(BLQ) & !is.null(LLOQ)) {
    DV0="DV0"
    LDV0="LDV0"
    MDV0="MDV0"
    DV1="DV1"
    LDV1="LDV1"
    MDV1="MDV1"
    DVLLOQ="DVLLOQ"
    LDVLLOQ="LDVLLOQ"
    MDVLLOQ="MDVLLOQ"
    
    nm$data=dplyr::arrange(plyr::ddply(nm$data,~ID,plyr::mutate,
                                       BLQ0=as.logical(BLQ) & DATASOURCE!="DOSE",
                                       DV0=ifelse(BLQ0,0,DV),
                                       BLQ1=seq_len(length(BLQ0))<which(!BLQ0 & DATASOURCE!="DOSE")[1] & DATASOURCE!="DOSE",
                                       DV1=ifelse(BLQ1,0,DV),
                                       BLQLLOQ=BLQ0 & !is.na(LLOQ),
                                       DVLLOQ=ifelse(BLQLLOQ,LLOQ/2,DV),
                                       SORTINDEX=SORTINDEX),SORTINDEX)
    
    nm$data$LDV0=ifelse(nm$data$DV0>0,log(nm$data$DV0),NA)
    nm$data$MDV0=ifelse(is.na(nm$data$DV0),1,0)
    nm$data$LDV1=ifelse(nm$data$DV1>0,log(nm$data$DV1),NA)
    nm$data$MDV1=ifelse(is.na(nm$data$DV1),1,0)
    nm$data$LDVLLOQ=ifelse(nm$data$DVLLOQ>0,log(nm$data$DVLLOQ),NA)
    nm$data$MDVLLOQ=ifelse(is.na(nm$data$DVLLOQ),1,0)
    
    dose$data$MDV0=1
    dose$data$MDV1=1
    dose$data$MDVLLOQ=1
  } else {
    DV0=NULL
    LDV0=NULL
    MDV0=NULL
    DV1=NULL
    LDV1=NULL
    MDV1=NULL
    DVLLOQ=NULL
    LDVLLOQ=NULL
    MDVLLOQ=NULL
  }
  
  nm$data$DOSETIME[!is.na(nm$data$AMT)]=nm$data$TIME[!is.na(nm$data$AMT)]
  nm$data$DOSETIME=repeat.before.id(nm$data[,c("ID","DOSETIME")])
  nm$data$TAD=nm$data$TIME-nm$data$DOSETIME
  
  nm$data=dplyr::arrange(plyr::ddply(nm$data,~ID,plyr::mutate,PDOSETIME=c(NA,utils::head(DOSETIME,-1)),SORTINDEX=SORTINDEX),SORTINDEX)
  nm$data$PDOSETIME[nm$data$EVID==4]=NA
  nm$data$PDOSETIME[is.na(nm$data$PDOSETIME)]=nm$data$DOSETIME[is.na(nm$data$PDOSETIME)]
  
  if (any(nm$data$TIME<0,na.rm=TRUE)) {
    TIME0="TIME0"
    TIME1="TIME1"
    
    nm$data$TIME0=ifelse(nm$data$TIME<0,0,nm$data$TIME)
    nm$data=dplyr::arrange(plyr::ddply(nm$data,~ID,plyr::mutate,TIME1=TIME-TIME[1],SORTINDEX=SORTINDEX),SORTINDEX)
  } else {
    TIME0=NULL
    TIME1=NULL
  }
  
  for (id in unique(nm$data$ID)) {
    idx=nm$data$ID==id & !is.na(nm$data$AMT) & nm$data$CMT==1
    nm$data$NUMDOSE[idx]=1:sum(idx)
  }
  if (max(nm$data$NUMDOSE, na.rm=TRUE)>1) {
    NUMDOSE="NUMDOSE"
    nm$data$NUMDOSE=repeat.before.id(nm$data[,c("ID","NUMDOSE")])
  } else {
    NUMDOSE=NULL
  }
  
  for (column in fillcolumns) {
    nm$data[,column]=repeat.before.id(nm$data[,c("ID",column)])
  }
  
  if (!is.null(cov$data)) {
    sqlcovcolumns=paste0("SQL_",covcolumns)
    cov$data[,sqlcovcolumns]=sapply(cov$data[,covcolumns],as.character)
    cov$data[,sqlcovcolumns][is.na(cov$data[,paste0("SQL_",covcolumns)])]="%"
    nm$data[,sqlcovcolumns]=sapply(nm$data[,covcolumns],as.character)
    
    nmdata=nm$data 
    for (variable in unique(cov$data$VARIABLE)) {
      if (verbose) cat(paste0(variable," ",nrow(nmdata), " -> "))
      if (variable %in% names(nmdata)) nmdata[,variable]=NULL
      covdata=reshape2::dcast(stats::as.formula(paste0(paste(c(sqlcovcolumns,"TIME"),collapse=" + ")," ~ VARIABLE")), data=subset(cov$data,VARIABLE==variable), value.var = "VALUE")
      if (all(is.na(covdata$TIME))) {
        nmdata=plyr::join(nmdata,covdata,by=sqlcovcolumns[apply(covdata[sqlcovcolumns]!="%",2,any)])
      } else {
        nmdata[,sqlcovcolumns[covcolumns!="TIME"]][is.na(nmdata[,sqlcovcolumns[covcolumns!="TIME"]])]="NA"
        nmdata=sqldf::sqldf(paste0("SELECT * FROM nmdata LEFT JOIN covdata ON ",
                                   paste(paste0("(nmdata.", sqlcovcolumns[covcolumns!="TIME"], " LIKE covdata.", sqlcovcolumns[covcolumns!="TIME"], ")"),collapse=" AND "),
                                   " AND (nmdata.SQL_TIME LIKE covdata.SQL_TIME OR nmdata.TIME = (select min(TIME) from nmdata where (nmdata.SQL_ID == covdata.SQL_ID and TIME >= covdata.TIME)))"))
      }
      if (verbose) cat(paste0(nrow(nmdata)," -> "))
      nmdata=nmdata[!duplicated(nmdata$SORTINDEX,fromLast=TRUE),]
      if (!variable %in% norepeatcolumns) nmdata[,variable]=repeat.before.id(nmdata[,c("ID",variable)])
      nmdata=nmdata[,!duplicated(names(nmdata))]
      if (verbose) cat(paste0(nrow(nmdata),"\n"))
    }
    nm$data=nmdata
    
  }
  
  if (!is.null(C) && !C %in% names(nm$data)) nm$data[,C]=""
  
  nm$data=nm$data[,c(C,"ID","TIME",TIME0,TIME1,"TAD","DOSETIME","PDOSETIME",EXTRATIME,NUMDOSE,"AMT",RATE,ADDL,II,SS,TYPE,"CMT","EVID","DV","LDV","MDV",DV0,LDV0,MDV0,DV1,LDV1,MDV1,DVLLOQ,LDVLLOQ,MDVLLOQ,BLQ,LLOQ,optionalcolumns,COVS)]
  
  if(parallel==FALSE & order==c(0,1)){
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]==-2,"F",nm$data[,"RATE"])
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]==0,-2,nm$data[,"RATE"])
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]=="F",0,nm$data[,"RATE"])
    nm$data = dplyr::filter(nm$data,is.na(RATE) | RATE!=0)
  }
  
  if(parallel==FALSE & order!=c(0,1)){
    stop("Would you like to use a sequential zero + first order absorption model? Please set order=c(0,1). Otherwise, please set parallel = T")
  }
  
  namestoconvert=names(nm$data)[!(names(nm$data) %in% c("C",nocoercioncolumns)) & sapply(nm$data,class) %in% c("character","logical")]
  nm$data[,namestoconvert]=convert.to.numeric(nm$data[,namestoconvert,drop=FALSE],initialindex,na.strings)
  if (!is.null(coercion$name)) write.coercion.comments(coercion$data, file=file.path(directory,coercion$name))
  message("Assembling date and time: ",lubridate::now())
  message("Time zone: ", Sys.timezone())
  message("Number of individuals: ", length(unique(nm$data$ID)))
  df_test = as.data.frame(nm$data)
  df_obs = dplyr::filter(df_test,MDV==0) 
  message("Number of observations: ", nrow(df_obs))
  df_doses = dplyr::filter(df_test, EVID==1) 
  doses = sort(as.vector(unique(df_doses$AMT)))
  message("Dose levels: ", paste(shQuote(doses), collapse=", "))
  if(!is.null(username)){
    message("This data set was assembled by ", paste(username))
  }
  if (!is.null(nm$name)) {
    utils::write.csv(nm$data, file=file.path(directory,nm$name),row.names=FALSE,quote=FALSE,na=missingvalues)
  } else {
    return(nm$data)
  }
}