R/and.R

Defines functions and

Documented in and

#' Assemble NONMEM Datasets
#'
#' Build a dataset in the specific format required for the NONMEM analysis from basic tabulated files
#'
#' @authors Olivier Barriere and Mario Gonzalez Sales
#'
#' @param directory path to your directory
#' @param order define the absorption order, can be 0, 1, c(0,1), or c(1,1)
#' @param pkname name for the required file containing the pk information
#' @param pk R object containing the pk information
#' @param dosename name for the required file containing the dose information
#' @param dose R object containing dose information
#' @param covname name for the optional file containing the covariate information
#' @param cov R object containing the covariate information
#' @param pdname name for the optional file containing the pd information
#' @param pd R object containing the pk information
#' @param extratimesname name for the optional file containing the additional times to be added
#' @param extratimes R object containing the additional times to be added
#' @param nmname name of output file generated by and
#' @param dateformat define date format if needed
#' @param timeformat define time format if needed
#' @param timeunits define time units if needed
#' @param arrange define how the columns should be arranged
#' @param optionalcolumns define optional columns
#' @param initialindex define the lower category of categorical covariates
#' @param simultaneous define simultaneous zer + first order absorption 
#' @return a .csv file
#' @export
#' @examples
#'
#' and(directory=file.path(inDirectory,"Theophylline"),order=1,
#'              pk=list(name="pk.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"), extratimes=list(name="addltimes.csv"),
#'              nm=list(name="nm1.csv"),
#'              optionalcolumns="TIMEPOINT")
#'
#' nm=and(directory=file.path(inDirectory,"Theophylline"),order=1,
#'              pk=list(name="pk.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"), extratimes=list(name="addltimes.csv"),
#'              optionalcolumns="TIMEPOINT")
#' write.csv(nm, file=file.path(inDirectory,"Theophylline","nm1b.csv"),row.names=F,quote=F,na=".")
#'
#' and(directory=file.path(inDirectory,"inAssembling/0. Poster"),order=c(1,1),
#'              pk=list(name="pk.csv"), pd=list(name="pd.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"), extratimes=list(name="addltimes.csv"),
#'              coercion=list(name="nm11.txt",sep=";"),
#'              nm=list(name="nm11.csv"),
#'              optionalcolumns="OCC")
#'
#' and(directory=file.path(inDirectory,"inAssembling/1. Basic"), order=0,
#'              pk=list(name="pk.csv"), pd=list(name="pd.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"),
#'              nm=list(name="nm_ZeroOrder.csv"))
#'
#' and(directory=file.path(inDirectory,"inAssembling/1. Basic"), order=1,
#'              pk=list(name="pk.csv"), pd=list(name="pd.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"),
#'              nm=list(name="nm_FirstOrder.csv"))
#'
#' and(directory=file.path(inDirectory,"inAssembling/1. Basic"), order=c(0,1),
#'              pk=list(name="pk.csv"), pd=list(name="pd.csv"), dose=list(name="dose.csv"), cov=list(name="cov.csv"),
#'              nm=list(name="nm_ParallelZeroAndFirstOrder.csv"))
#'
#' and(directory=file.path(inDirectory,"inAssembling/2. Multiple analytes and effects"), order=1,
#'              pk=list(name="pk2.xlsx"), pd=list(name="pd3.xlsx"), dose=list(name="dose.csv"), cov=list(name="cov.csv"),
#'              nm=list(name="nm_MultipleAnalytesandEffects.csv"))
#'
#' and(directory=file.path(inDirectory,"inAssembling/3. Optional columns"),order=1,
#'              pk=list(name="pk_OptionalColumns.csv"), dose=list(name="dose_OptionalColumns.csv"), cov=list(name="cov.csv"),
#'              coercion=list(name="nm_OptionalColumns.txt"),
#'              nm=list(name="nm_OptionalColumns.csv"),
#'              optionalcolumns=c("OCC","TIMEPOINT","TRT"), fillcolumns="TRT")
#'
#' and(directory=file.path(inDirectory,"inAssembling/4. EVID4"),order=1,
#'              pk=list(name="pk.xlsx"), dose=list(name="dose.csv"), cov=list(name="cov.csv"),
#'              extratimes=list(name="addltimes.xlsx"),
#'              nm=list(name="nm_1_full.csv"),
#'              coercion=list(name="nm_1_full.txt"),
#'              optionalcolumns=c("PERIOD","TIMEPOINT","TRT"), fillcolumns="TRT", arrange="ID,PERIOD,TIME,desc(MDV),CMT")
#'
#' and(directory=file.path(inDirectory,"inAssembling/5. extratimes mixte"),order=1,
#'              pk=list(name="pk_2017-11-23.csv"), dose=list(name="doses_2017-11-23.csv"),
#'              nm=list(name="nm.csv"),
#'              optionalcolumns=c("TIMEPOINT"), datetimeformat="%Y-%m-%d %H:%M")
#'
#' and(directory=file.path(inDirectory,"inAssembling/5. extratimes mixte"),order=1,
#'              pk=list(name="pk_2017-11-23.csv"), dose=list(name="doses_2017-11-23.csv"), extratimes=list(name="extratimes_2017-11-23_TIME.csv"),
#'              nm=list(name="nmt.csv"),
#'              optionalcolumns=c("TIMEPOINT"), datetimeformat="%Y-%m-%d %H:%M")
#'
#' and(directory=file.path(inDirectory,"inAssembling/5. extratimes mixte"),order=1,
#'              pk=list(name="pk_2017-11-23.csv"), dose=list(name="doses_2017-11-23.csv"), extratimes=list(name="extratimes_2017-11-23_DATETIME.csv"),
#'              nm=list(name="nmdt.csv"),
#'              optionalcolumns=c("TIMEPOINT"), datetimeformat="%Y-%m-%d %H:%M")
#'
#' and(directory=file.path(inDirectory,"inAssembling/5. extratimes mixte"),order=1,
#'              pk=list(name="pk_2017-11-23.csv"), dose=list(name="doses_2017-11-23.csv"), extratimes=list(name="extratimes_2017-11-23.csv"),
#'              nm=list(name="nmtdt.csv"),
#'              optionalcolumns=c("TIMEPOINT"), datetimeformat="%Y-%m-%d %H:%M")


and = function(directory=NULL,
               order,
               pk=list(name=NULL,data=NULL),
               dose=list(name=NULL,data=NULL),
               cov=list(name=NULL,data=NULL),
               pd=list(name=NULL,data=NULL),
               extratimes=list(name=NULL,data=NULL),
               nm=list(name=NULL),
               coercion=list(name=NULL,sep=","),
               optionalcolumns=NULL, 
               fillcolumns=NULL, 
               nocoercioncolumns=NULL, 
               norepeatcolumns=NULL,
               initialindex=0, 
               na.strings="N/A",
               arrange="ID,TIME,CMT,desc(EVID)",
               datetimeformat="%Y-%m-%d %H:%M:%S",
               timeunits="hours",
               timezone=Sys.timezone(),
               ignore="C", 
               missingvalues=".",
               simultaneous=F,
               verbose=F) {  
  
  ## options
  #options(stringsAsFactors=F)
  packages=c("lubridate", "readxl", "reshape" ,"reshape2", "sqldf","plyr")
  if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
    install.packages(setdiff(packages, rownames(installed.packages())))
  }
  lapply(packages, require, character.only=T)
  
  ## functions
  repeat.before = function(x) {
    ind = which(!is.na(x))
    if(is.na(x[1]))
      ind = c(1,ind)
    rep(x[ind], times = diff(
      c(ind, length(x) + 1) ))
  }
  
  repeat.before.id = function(df) {
    for (id in unique(df$ID)) {
      df[df$ID==id,2]=repeat.before(df[df$ID==id,2])
    }
    return(df[,2])
  }
  
  rbinddiff = function(...) {
    dots=list(...)
    
    if (length(dots)==0) return(NULL)
    
    df=dots[[1]]
    for (i in 1+seq_len(length(dots)-1)) {
      df=rbinddiff2(df,dots[[i]])
    }
    return(df)
  }
  
  rbinddiff2 = function(a,b) {
    notina=setdiff(names(b),names(a))
    notinb=setdiff(names(a),names(b))
    if (nrow(a)>0) a[,notina]=NA
    if (nrow(b)>0) b[,notinb]=NA
    
    rbind(a,b)
  }
  
  convert.datetime = function(df) {
    if (!"DATETIME" %in% names(df)) {
      df$TIME=as.numeric(df$TIME)
    } else {
      df$DATETIME=as.POSIXct(strptime(df$DATETIME,format=datetimeformat,tz=timezone),tz=timezone)
    }
    return(df)
  }
  
  compute.time = function (df,dose) {
    if ("DATETIME" %in% names(df) & "DATETIME" %in% names(dose)) {
      df=join(df,ddply(dose,~ID,summarise,FIRSTDOSEDATETIME=min(DATETIME)),by="ID")
      if ("TIME" %in% names(df)) {
        df$TIME[is.na(df$TIME)]=as.numeric(difftime(df$DATETIME[is.na(df$TIME)],df$FIRSTDOSEDATETIME[is.na(df$TIME)],units=timeunits,tz=timezone))
      } else {
        df$TIME=as.numeric(difftime(df$DATETIME,df$FIRSTDOSEDATETIME,units=timeunits,tz=timezone))
      }
      df$FIRSTDOSEDATETIME=NULL
    } else {
      df$DATETIME=NA
    }
    return(df)
  }
  
  add.times = function(df,times) {
    if (!"EVID" %in% names(df)) df$EVID=0
    df$EXTRATIME=0
    if (!is.null(times)) {
      df2=setNames(as.data.frame(times[rep(1:nrow(times),times=length(unique(df$ID))),]),names(times))
      if (!"ID" %in% names(times)) df2$ID=rep(unique(df$ID),each=nrow(times))
      df2=df2[!duplicated(rbind(df[,names(df2)],df2))[(nrow(df)+1):(nrow(df)+nrow(df2))],]
      
      if (nrow(df2)>0) {
        if (!"EVID" %in% names(df2)) df2$EVID=2
        df2$EXTRATIME=1
        df=rbinddiff(df,df2)
      }
      df=arrange(df,ID,TIME)  
    }
    return(df)
  }
  
  convert.to.numeric = function (df,initialindex,na.strings) {
    for (name in names(df)) {
      if (class(df[,name])!="character") df[,name]=as.character(df[,name])
      df[,name][df[,name] %in% na.strings]=NA
      if (all(!(ifelse(is.na(df[,name]), NA, TRUE) & suppressWarnings(is.na(as.numeric(df[,name])))), na.rm=T)) {
        df[,name]=as.numeric(df[,name])
      } else {
        df[,name]=as.factor(df[,name])
        lvl=data.frame(seq_along(levels(df[,name]))+initialindex-1,levels(df[,name]))
        message((paste0("Automatic coercion to numeric for ", name, "\n",
                        paste(paste(lvl[,1],lvl[,2],sep="="),collapse="\n"))))
        df[,name]=as.numeric(df[,name])+initialindex-1
        if (!is.null(coercion$data)) coercion$data <<- rbind(coercion$data,data.frame(VAR=name,setNames(lvl,c("NUM","CHAR"))))
      }
    }
    return(df)
  }
  
  write.coercion.comments = function(df,file,sep=if ("sep" %in% names(coercion)) coercion$sep else ",") {
    fileConn=file(file)
    df$NUMCHAR=paste0(df$NUM,"=",df$CHAR)
    df=dcast(df,VAR~NUM,value.var="NUMCHAR")    
    lines=c()
    for (i in seq_len(nrow(df))) {
      lines=c(lines,paste0(df[i,1], ": ", paste(df[i,-1][!is.na(df[i,-1])],collapse=paste0(sep," "))))
    }
    writeLines(lines, fileConn)
    close(fileConn)
  }
  
  file.ext = function(x) {
    ext=regmatches(x, regexec("\\.([^\\.]+$)",x))[[1]][2]
    if (is.na(ext)) ext=""
    return(ext)
  }
  
  file.name = function(x) {
    name=regmatches(x, regexec("(.*)\\.[^\\.]+$",x))[[1]][2]
    if (is.na(name)) name=x
    return(name)
  }
  
  ## constants
  
  ## parameters
  
  ## import data
  # pk
  if (is.null(directory)) directory="" #NULL not compatible with file.path
  
  if (is.null(pk$data)) {
    pk$data=list()
    if (tolower(file.ext(pk$name))=="csv") {
      pk$sheetnames=file.name(pk$name)
      pk$data[[1]]=read.csv(file=file.path(directory,pk$name), na.strings=missingvalues)
    } else {
      pk$sheetnames = excel_sheets(path=file.path(directory,pk$name))
      for (i in seq_along(pk$sheetnames)) {
        pk$data[[i]]=as.data.frame(read_excel(path=file.path(directory,pk$name),sheet=i))
      }
    } 
  } else {
    pk$sheetnames=names(pk$data)
  }
  # dose
  if (is.null(dose$data)) {
    dose$data=read.csv(file=file.path(directory,dose$name), na.strings=missingvalues)
  }
  # cov
  if (is.null(cov$data)) {
    if (!is.null(cov$name) && nchar(cov$name)>0 && file.exists(file.path(directory,cov$name))) cov$data=read.csv(file=file.path(directory,cov$name), na.strings=missingvalues)
  }
  # pd
  if (is.null(pd$data)) {
    if (!is.null(pd$name) && nchar(pd$name)>0 && file.exists(file.path(directory,pd$name))) {
      pd$data=list()
      if (tolower(file.ext(pd$name))=="csv") {
        pd$sheetnames=file.name(pd$name)
        pd$data[[1]]=read.csv(file=file.path(directory,pd$name), na.strings=missingvalues)
      } else {
        pd$sheetnames = excel_sheets(path=file.path(directory,pd$name))
        for (i in seq_along(pd$sheetnames)) {
          pd$data[[i]]=as.data.frame(read_excel(path=file.path(directory,pd$name),sheet=i))
        }
      }
    }
  } else {
    pd$sheetnames=names(pd$data)
  }
  # extratimes
  if (is.null(extratimes$data)) {
    if (!is.null(extratimes$name) && nchar(extratimes$name)>0 && file.exists(file.path(directory,extratimes$name))) {
      extratimes$data=list()
      if (tolower(file.ext(extratimes$name))=="csv") {
        extratimes$data[[1]]=read.csv(file=file.path(directory,extratimes$name), na.strings=missingvalues)
        names(extratimes$data)="pkpd"
      } else {
        extratimes$sheetnames = excel_sheets(path=file.path(directory,extratimes$name))
        for (i in seq_along(extratimes$sheetnames)) {
          extratimes$data[[i]]=as.data.frame(read_excel(path=file.path(directory,extratimes$name),sheet=i))
        }
        names(extratimes$data)=excel_sheets(path=file.path(directory,extratimes$name))
      }
    }
  }
  
  ## Check/change type
  # dose
  dose$data=convert.datetime(dose$data)
  dose$data=compute.time(dose$data,dose$data)
  dose$data$DATASOURCE="DOSE"
  # extratimes
  if (!is.null(extratimes$data)) {
    EXTRATIME="EXTRATIME"
    names(extratimes$data)=tolower(names(extratimes$data))
    if ("pk" %in% names(extratimes$data)) names(extratimes$data)[names(extratimes$data)=="pk"]="pk1"
    if ("pd" %in% names(extratimes$data)) names(extratimes$data)[names(extratimes$data)=="pd"]="pd1"
    
    for (i in seq_along(extratimes$data)) {
      extratimes$data[[i]]=convert.datetime(extratimes$data[[i]])
      extratimes$data[[i]]=compute.time(extratimes$data[[i]],dose$data)
    }
    dose$data$EXTRATIME=0
  } else {
    EXTRATIME=NULL
  }
  # pk
  for (i in seq_along(pk$data)) {
    pk$data[[i]]=convert.datetime(pk$data[[i]])
    pk$data[[i]]=compute.time(pk$data[[i]],dose$data)
    if (!is.null(extratimes$data)) {
      sheetname=names(extratimes$data)[names(extratimes$data) %in% c(paste0("pk",i), "pkpd")]
      pk$data[[i]]=add.times(pk$data[[i]],if (length(sheetname)!=0) extratimes$data[[sheetname]] else NULL)
    }
    pk$data[[i]]$DV=as.numeric(pk$data[[i]]$DV)  
    pk$data[[i]]$DATASOURCE="PK"
  }
  # cov
  if (!is.null(cov$data)) {
    if (!"TIME" %in% names(cov$data)) cov$data$TIME=NA
    if (!"VARIABLE" %in% names(cov$data)) cov$data=rename(melt(cov$data,id.vars=c("ID","TIME",optionalcolumns)[c("ID","TIME",optionalcolumns) %in% names(cov$data)]),c("variable"="VARIABLE","value"="VALUE"))
    covcolumns=names(cov$data)[names(cov$data) %in% c("ID","TIME",optionalcolumns) & !names(cov$data) %in% c("VARIABLE","VALUE")]
    cov$data=convert.datetime(cov$data)
    cov$data=compute.time(cov$data,dose$data)
    cov$data$DATASOURCE="COV"
  }
  # pd
  if (!is.null(pd$data)) {
    for (i in seq_along(pd$data)) {
      pd$data[[i]]=convert.datetime(pd$data[[i]])
      pd$data[[i]]=compute.time(pd$data[[i]],dose$data)
      if (!is.null(extratimes$data)) {
        sheetname=names(extratimes$data)[names(extratimes$data) %in% c(paste0("pd",i), "pkpd")]
        pd$data[[i]]=add.times(pd$data[[i]],if (length(sheetname)!=0) extratimes$data[[sheetname]] else NULL)
      }
      pd$data[[i]]$DV=as.numeric(pd$data[[i]]$DV)  
      pd$data[[i]]$DATASOURCE="PD"
    }
  }
  
  ## NONMEM columns
  RATE=if ("RATE" %in% names(dose$data)) "RATE" else NULL
  ADDL=if ("ADDL" %in% names(dose$data)) "ADDL" else NULL
  II=if ("II" %in% names(dose$data)) "II" else NULL
  SS=if ("SS" %in% names(dose$data)) "SS" else NULL
  BLQ=if ("BLQ" %in% unlist(lapply(pk$data,names))) "BLQ" else NULL
  LLOQ=if ("LLOQ" %in% unlist(lapply(pk$data,names))) "LLOQ" else NULL
  COVS=if (!is.null(cov$data)) as.character(sort(unique(cov$data$VARIABLE))) else NULL
  TYPE=if (!is.null(pd$data)) "TYPE" else NULL
  C=if (!is.null(ignore)) ignore else NULL
  #KEY=if (!is.null(key)) key else NULL
  
  # TYPE and CMT
  dose$data$TYPE=0
  for (i in seq_along(pk$data)) {
    pk$data[[i]]$TYPE=1
    pk$data[[i]]$CMT=i
  }
  if (!is.null(pd$data)) {
    for (i in seq_along(pd$data)) {
      pd$data[[i]]$TYPE=2
      pd$data[[i]]$CMT=length(pk$data)+i
    }
  }
  
  ## PK/PD
  nm$data=if (!is.null(pd$data)) rbinddiff(do.call(rbinddiff,pk$data),do.call(rbinddiff,pd$data)) else do.call(rbinddiff,pk$data)
  
  # Unique PK/PD IDs
  uids=unique(nm$data$ID)
  dose$data=subset(dose$data,ID %in% uids)
  if (!is.null(cov$data)) cov$data=subset(cov$data,ID %in% uids)
  
  # LogDV
  nm$data$LDV=ifelse(nm$data$DV>0,log(nm$data$DV),NA)
  
  # EVID and MDV
  if (!"EVID" %in% names(nm$data)) nm$data$EVID=0
  if (!"MDV" %in% names(nm$data)) nm$data$MDV=ifelse(!is.na(nm$data$DV),0,1)
  if (!"EVID" %in% names(dose$data)) dose$data$EVID=1
  if (!"MDV" %in% names(dose$data)) dose$data$MDV=1
  
  # CMT and RATE
  if (length(order)==1 && order==0) {
    dose$data$CMT=1
  } else if (length(order)==1 && order==1) {
    dose$data$CMT=1
    nm$data$CMT=nm$data$CMT+1
  } else if (length(order)==2 && all(order==c(0,0))) {
    dose$data=expand.grid.df(dose$data,data.frame(CMT=c(1,1)))
  } else if (length(order)==2 && (all(order==c(0,1)) | all(order==c(1,0)))) {
    dose$data=expand.grid.df(dose$data,data.frame(CMT=c(1,2)))
    if (is.null(RATE)) {
      RATE="RATE"
      dose$data$RATE[dose$data$CMT==1]=0
    }
    dose$data$RATE[dose$data$CMT==2]=-2
    nm$data$CMT=nm$data$CMT+1
  } else if (length(order)==2 && all(order==c(1,1))) {
    dose$data=expand.grid.df(dose$data,data.frame(CMT=c(1,2)))
    nm$data$CMT=nm$data$CMT+2
  }
  
  # Coercion
  nm$sheetnames=c(pk$sheetnames,pd$sheetnames)
  nm$cmts=unique(nm$data$CMT)
  lvl=data.frame(tail(nm$cmts,length(nm$sheetnames)),nm$sheetnames)
  message((paste0("Automatic coercion to numeric for CMT\n",
                  paste(paste(lvl[,1],lvl[,2],sep="="),collapse="\n"))))
  if (!is.null(coercion$name)) coercion$data=data.frame(VAR="CMT",setNames(lvl,c("NUM","CHAR")))
  
  # NM+DOSE
  nm$data=rbinddiff(nm$data,dose$data)
  
  # Sort
  if (nchar(arrange)>0) eval(parse(text=paste0("nm$data=arrange(nm$data,",arrange,")")))
  nm$data$SORTINDEX=1:nrow(nm$data)
  
  # BLQ and LLOQ
  if (!is.null(BLQ) & !is.null(LLOQ)) {
    DV0="DV0"
    LDV0="LDV0"
    MDV0="MDV0"
    DV1="DV1"
    LDV1="LDV1"
    MDV1="MDV1"
    DVLLOQ="DVLLOQ"
    LDVLLOQ="LDVLLOQ"
    MDVLLOQ="MDVLLOQ"
    
    nm$data=arrange(ddply(nm$data,~ID,mutate,
                          BLQ0=as.logical(BLQ) & DATASOURCE!="DOSE", 
                          DV0=ifelse(BLQ0,0,DV),
                          BLQ1=seq_len(length(BLQ0))<which(!BLQ0 & DATASOURCE!="DOSE")[1] & DATASOURCE!="DOSE",
                          DV1=ifelse(BLQ1,0,DV),
                          BLQLLOQ=BLQ0 & !is.na(LLOQ),
                          DVLLOQ=ifelse(BLQLLOQ,LLOQ/2,DV),
                          SORTINDEX=SORTINDEX),SORTINDEX)
    
    nm$data$LDV0=ifelse(nm$data$DV0>0,log(nm$data$DV0),NA)
    nm$data$MDV0=ifelse(is.na(nm$data$DV0),1,0)
    nm$data$LDV1=ifelse(nm$data$DV1>0,log(nm$data$DV1),NA)
    nm$data$MDV1=ifelse(is.na(nm$data$DV1),1,0)
    nm$data$LDVLLOQ=ifelse(nm$data$DVLLOQ>0,log(nm$data$DVLLOQ),NA)
    nm$data$MDVLLOQ=ifelse(is.na(nm$data$DVLLOQ),1,0)
    
    dose$data$MDV0=1
    dose$data$MDV1=1
    dose$data$MDVLLOQ=1
  } else {
    DV0=NULL
    LDV0=NULL
    MDV0=NULL
    DV1=NULL
    LDV1=NULL
    MDV1=NULL
    DVLLOQ=NULL
    LDVLLOQ=NULL
    MDVLLOQ=NULL
  }
  
  # TAD
  nm$data$DOSETIME[!is.na(nm$data$AMT)]=nm$data$TIME[!is.na(nm$data$AMT)]
  nm$data$DOSETIME=repeat.before.id(nm$data[,c("ID","DOSETIME")])
  nm$data$TAD=nm$data$TIME-nm$data$DOSETIME
  
  # DOSETIME and PDOSETIME http://www.cognigencorp.com/nonmem/nm/99dec212004.html
  nm$data=arrange(ddply(nm$data,~ID,mutate,PDOSETIME=c(NA,head(DOSETIME,-1)),SORTINDEX=SORTINDEX),SORTINDEX)
  nm$data$PDOSETIME[nm$data$EVID==4]=NA
  nm$data$PDOSETIME[is.na(nm$data$PDOSETIME)]=nm$data$DOSETIME[is.na(nm$data$PDOSETIME)]
  
  # TIME0 and TIME1
  if (any(nm$data$TIME<0,na.rm=T)) {
    TIME0="TIME0"
    TIME1="TIME1"
    
    nm$data$TIME0=ifelse(nm$data$TIME<0,0,nm$data$TIME)
    nm$data=arrange(ddply(nm$data,~ID,mutate,TIME1=TIME-TIME[1],SORTINDEX=SORTINDEX),SORTINDEX)
  } else {
    TIME0=NULL
    TIME1=NULL
  }
  
  # NUMDOSE
  for (id in unique(nm$data$ID)) {
    idx=nm$data$ID==id & !is.na(nm$data$AMT) & nm$data$CMT==1
    nm$data$NUMDOSE[idx]=1:sum(idx)
  }
  if (max(nm$data$NUMDOSE, na.rm=T)>1) {
    NUMDOSE="NUMDOSE"
    nm$data$NUMDOSE=repeat.before.id(nm$data[,c("ID","NUMDOSE")])
  } else {
    NUMDOSE=NULL
  }
  
  # fillcolumns columns
  for (column in fillcolumns) {
    nm$data[,column]=repeat.before.id(nm$data[,c("ID",column)])
  }
  
  ## COV
  if (!is.null(cov$data)) {
    sqlcovcolumns=paste0("SQL_",covcolumns)
    cov$data[,sqlcovcolumns]=sapply(cov$data[,covcolumns],as.character) 
    cov$data[,sqlcovcolumns][is.na(cov$data[,paste0("SQL_",covcolumns)])]="%"
    nm$data[,sqlcovcolumns]=sapply(nm$data[,covcolumns],as.character)
    
    nmdata=nm$data #sqldf not compatible with nm$data
    for (variable in unique(cov$data$VARIABLE)) {
      if (verbose) cat(paste0(variable," ",nrow(nmdata), " -> "))
      if (variable %in% names(nmdata)) nmdata[,variable]=NULL
      covdata=dcast(as.formula(paste0(paste(c(sqlcovcolumns,"TIME"),collapse=" + ")," ~ VARIABLE")), data=subset(cov$data,VARIABLE==variable), value.var = "VALUE")
      if (all(is.na(covdata$TIME))) {
        nmdata=join(nmdata,covdata,by=sqlcovcolumns[apply(covdata[sqlcovcolumns]!="%",2,any)])
        #nmdata=sqldf(paste0("SELECT * FROM nmdata LEFT JOIN covdata ON ",paste(paste0("(nmdata.", sqlcovcolumns[covcolumns!="TIME"], " LIKE covdata.", sqlcovcolumns[covcolumns!="TIME"], ")"),collapse=" AND ")))
      } else {
        nmdata[,sqlcovcolumns[covcolumns!="TIME"]][is.na(nmdata[,sqlcovcolumns[covcolumns!="TIME"]])]="NA"
        #nmdata=join(nmdata,covdata,by=sqlcovcolumns[apply(covdata[sqlcovcolumns]!="%",2,any)])
        nmdata=sqldf(paste0("SELECT * FROM nmdata LEFT JOIN covdata ON ",
                            paste(paste0("(nmdata.", sqlcovcolumns[covcolumns!="TIME"], " LIKE covdata.", sqlcovcolumns[covcolumns!="TIME"], ")"),collapse=" AND "),
                            " AND (nmdata.SQL_TIME LIKE covdata.SQL_TIME OR nmdata.TIME = (select min(TIME) from nmdata where (nmdata.SQL_ID == covdata.SQL_ID and TIME >= covdata.TIME)))"))
      }
      if (verbose) cat(paste0(nrow(nmdata)," -> "))
      nmdata=nmdata[!duplicated(nmdata$SORTINDEX,fromLast=T),]
      if (!variable %in% norepeatcolumns) nmdata[,variable]=repeat.before.id(nmdata[,c("ID",variable)])
      nmdata=nmdata[,!duplicated(names(nmdata))]
      if (verbose) cat(paste0(nrow(nmdata),"\n"))
    }
    nm$data=nmdata
    
  }
  
  ## C
  if (!is.null(C) && !C %in% names(nm$data)) nm$data[,C]=""
  
  ## Key
  #if (!is.null(KEY) && !KEY %in% names(nm$data)) nm$data[,KEY]=seq_len(nrow(nm$data))
  
  ## Output
  nm$data=nm$data[,c(C,"ID","TIME",TIME0,TIME1,"TAD","DOSETIME","PDOSETIME",EXTRATIME,NUMDOSE,"AMT",RATE,ADDL,II,SS,TYPE,"CMT","EVID","DV","LDV","MDV",DV0,LDV0,MDV0,DV1,LDV1,MDV1,DVLLOQ,LDVLLOQ,MDVLLOQ,BLQ,LLOQ,optionalcolumns,COVS)]
  
  if(simultaneous & order==c(0,1)){
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]==-2,"F",nm$data[,"RATE"])
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]==0,-2,nm$data[,"RATE"])
    nm$data[,"RATE"] = ifelse(nm$data[,"RATE"]=="F",0,nm$data[,"RATE"])
    nm$data = dplyr::filter(nm$data,is.na(RATE) | RATE!=0)
  }

  ## Convert to numeric
  namestoconvert=names(nm$data)[!(names(nm$data) %in% c("C",nocoercioncolumns)) & sapply(nm$data,class) %in% c("character","logical")]
  nm$data[,namestoconvert]=convert.to.numeric(nm$data[,namestoconvert,drop=F],initialindex,na.strings)
  if (!is.null(coercion$name)) write.coercion.comments(coercion$data, file=file.path(directory,coercion$name))
  
  if (!is.null(nm$name)) {
    ## Write csv
    write.csv(nm$data, file=file.path(directory,nm$name),row.names=F,quote=F,na=missingvalues)
  } else {
    ## Return
    return(nm$data)
  }
  
  
}
ModelingGreatSolutions/andir documentation built on Aug. 12, 2018, 9:07 a.m.