R/bootstrap.R

Defines functions .check_double .check_in_vector is.string .check_in_range .check_strict_pos_integer .check_string .check_bool .catchSimulxBug .getBootData .initBootstrapSettings runBootstrapProject cleanbootstrap generateBootstrapProject generateDataSetParametricSimulx generateDataSetResample bootmlx

Documented in bootmlx

#' Bootstrapping - case resampling
#'
#' Generate replicates of the original data using random sampling with replacement.
#' Population parameters are then estimated from each replicate.
#' 
#' Bootstrap functionality is now available directly in the lixoftConnectors package using the function
#' \code{runBootstrap}. Please migrate, as this function will be deprecated in the future.
#'  
#' @param project Monolix project 
#' @param nboot [optional] number of bootstrap replicates (default=100)
#' @param dataFolder [optional] folder where already generated datasets are stored, e.g dataFolder="./dummy_project/boot/" (default: data set are generated by bootmlx)
#' @param parametric [optional] boolean to define if parametric bootstrap is performed (new data is drawn from the model), (default: false)
#' @param tasks [optional] vector of booleans defining the list of tasks to perform (default: estimation of the population parameters)
#' available tasks: populationParameterEstimation, conditionalDistributionSampling,
#' conditionalModeEstimation, standardErrorEstimation, logLikelihoodEstimation, plots  
#' @param settings [optional] a list of settings for the resampling and the results:
#' \itemize{
#' \item \code{N} the number of individuals in each bootstrap data set 
#' (default value is the  number of individuals in the original data set).
#' \item \code{newResampling} boolean to generate the data sets again if they already exist (default=FALSE).
#' \item \code{covStrat} a categorical covariate of the project. The original distribution of this covariate
#' is maintained in each resampled data set if covStrat is defined (default=NULL). Notice that if the categorical covariate is varying 
#' within the subject (in case of IOV), it will not be taken into account.
#' \item \code{plot} boolean to choose if the distribution of the bootstraped esimates is displayed
#' (default = FALSE)
#' \item \code{level} level of the bootstrap confidence intervals of the population parameters
#' (default = 0.90)
#' \item \code{seed} a positive integer < 2147483647, seed for the generation of the data sets (default = NA)
#' \item \code{deleteData} delete created data set files after estimation (default = FALSE)
#' \item \code{deleteProjects} delete created Monolix projects after estimation (default = FALSE)
#' }
#' @return a data frame with the bootstrap estimates
#' @seealso \code{getBootstrapSettings} settings for bootstrap with lixoftConnectors \cr
#' \code{runBootstrap} run the bootstrap with lixoftConnectors \cr
#' \code{getBootstrapResults} results for bootstrap with lixoftConnectors
#' @examples
#' \dontrun{
#' # RsmlxDemo1.mlxtran is a Monolix project for modelling the PK of warfarin using a PK model 
#' # with parameters ka, V, Cl.
#' 
#' # In this example, bootmlx will generate 100 random replicates of the original data and will
#' # use Monolix to estimate the population parameters from each of these 100 replicates:
#' r1 <- bootmlx(project="RsmlxDemo1.mlxtran")
#'   
#' # 5 replicates will now be generated, with 50 individuals in each replicate:
#' r2 <- bootmlx(project="RsmlxDemo1.mlxtran",  nboot = 5, settings = list(N = 50))
#' 
#' # Proportions of males and females in the original dataset will be preserved   
#' # in each replicate:
#' r3 <- bootmlx(project="RsmlxDemo1.mlxtran",  settings = list(covStrat = "sex"))
#' }
#' 
#' # See http://monolix.lixoft.com/rsmlx/bootmlx/ for detailed examples of use of bootmlx
#' # Download the demo examples here: http://monolix.lixoft.com/rsmlx/installation
#' 
#' 
#' @importFrom graphics boxplot lines par plot
#' @importFrom stats quantile
#' @export
bootmlx <- function(project, nboot = 100, dataFolder = NULL, parametric = FALSE,
                    tasks=c(populationParameterEstimation=TRUE), 
                    settings = NULL){
  warning("This function will be deprecated. Please migrate to lixoftConnectors::runBootstrap().", 
          call. = FALSE, immediate. = TRUE)
  
  initRsmlx()
  
  params <- as.list(match.call(expand.dots=T))[-1]
  
  r <- prcheck(project, f="boot", settings=settings)
  if (r$demo)
    return(r$res)
  project <- r$project

  mlx.loadProject(project)
  exportDir <- mlx.getProjectSettings()$directory
  projectName <- basename(tools::file_path_sans_ext(project))
  
  # Check and initialize inputs ------------------------------------------------
  if (is.element("dataFolder", names(params)) & (is.element("nboot", names(params)))) {
    stop("DataFolder and nBoot can not be used both at the same time.", call.=F)
  }

  if (is.element("dataFolder", names(params)) & !is.null(settings$deleteData)) {
    warning("Data sets will not be deleted when using dataFolder argument.")
  }

  .check_strict_pos_integer(nboot, "nboot")
  if (is.null(nboot)) nboot <- 100
  .check_bool(parametric, "parametric")
  .check_in_vector(names(tasks), "tasks names", names(mlx.getScenario()$tasks))
  .check_bool(tasks, "tasks")
  tasks['populationParameterEstimation'] <- TRUE
  
  # check and initialize the settings
  settings <- .initBootstrapSettings(settings, parametric)
  settings$tasks <- tasks
  settings$nboot <- nboot
  plot.res <- settings$plot
  settings$plot<- NULL
  level <- settings$level
  
  settings$level<- NULL
  
  # check if results exist if parametric
  if (parametric && length(mlx.getEstimatedPopulationParameters()) == 0) {
    stop("[ERROR] No valid results present in the project.")
  }
  # check if filters are used
  if (!parametric && !mlx.getAvailableData()[[1]]$current) {
    stop("[ERROR] Nonparametric bootstrap is not available for projects that use data filtering.")
  }
  
  if (!is.null(dataFolder)) {
    dataFiles <- list.files(path = dataFolder, pattern = '*.txt|*.csv')
    if (length(dataFiles) > 0) {
      settings$nboot <- length(dataFiles)
      settings$newResampling <- FALSE
    } else {
      stop("Folder ", dataFolder, " does not exist or does not contain any data set.", call.=F)
    }
    
    dataFolderToUse = dataFolder
    boot.folder = '/bootstrap/'
    if (!dir.exists(file.path(exportDir, boot.folder))) {
      dir.create(file.path(exportDir, boot.folder))
    }
    paramResults <- NULL
    indexSample <- 1
    for (dataFile in dataFiles) {
      cat(paste0("Bootstrap replicate ", indexSample, "/", length(dataFiles), "\n"))
      fullDataPath <- file.path(dataFolder, dataFile)
      projectName <- generateBootstrapProject(project, boot.folder, indexSample, fullDataPath)
      results <- runBootstrapProject(projectName, indexSample, settings)
      paramResults <- rbind(paramResults, results)
      indexSample <- indexSample + 1
    }
  } else {
    # Prepare all the output folders
    
    # generate data sets from the initial data set
    if (parametric) {
      boot.folder = '/bootstrap/parametric/'
    } else {
      boot.folder = '/bootstrap/nonParametric/'
    }

    if (settings$newResampling) {
      cleanbootstrap(project, boot.folder)
    }
    dataFolderToUse = file.path(exportDir, boot.folder, 'data')

    # Convert a project with data formatting to one with no data formatting
    if (!is.null(mlx.getFormatting()) && !parametric) {
      dir.create(file.path(exportDir, boot.folder), showWarnings = FALSE, recursive = TRUE)
      formattedData <- NULL
      formattedData$dataFile <- file.path(exportDir, boot.folder, "noFormatting.csv")
      mlx.saveFormattedFile(formattedData$dataFile)
      formattedData$headerTypes <- mlx.getData()$headerTypes
      formattedData$observationTypes <- mlx.getData()$observationTypes
      mapping <- mlx.getMapping()
      mlx.setData(formattedData)
      mlx.setMapping(mapping)
      project <- file.path(exportDir, boot.folder, paste0(projectName, ".mlxtran"))
      mlx.saveProject(project)
      mlx.setProjectSettings(directory = exportDir)
      mlx.saveProject()
    }

    if (!parametric) {
      # Check if header names match between data and Monolix
      originalData <- read.res(mlx.getData()$dataFile)
      if (!all(mlx.getData()$header == names(originalData))) {
        stop('[ERROR] Monolix headers do not match headers in the data set. Special characters (such as space " ", slash "/", parenthesis "(", or symbols e. "%") as well as several columns with the same header are not supported by bootmlx. Please modify the headers of the original dataset, load the modified dataset in Monolix and retry. If you are using an R version prior to 4.2, the cause of this error could be the encoding of the data set file (UTF-8-BOM instead of UTF-8).')
      }
    }
    
    if (parametric) {
      paramResults <- generateDataSetParametricSimulx(project, settings, boot.folder)
    } else {
      paramResults <- generateDataSetResample(project, settings, boot.folder, dataFolderToUse)
    }
    useLL = F
  }
  
  colnames(paramResults) <- names(mlx.getEstimatedPopulationParameters())
  row.names(paramResults) <- 1:settings$nboot
  paramResults <- as.data.frame(paramResults)
  
  res.file <- file.path(exportDir,boot.folder,"populationParameters.txt")
  write.table(x = paramResults, file = res.file,
              eol = "\n", sep = ",", col.names = TRUE, quote = FALSE, row.names = FALSE)
  cat("Estimated population parameters have been saved: ", normalizePath(res.file), ".\n")
  
  # Plot the results
  if (plot.res) {
    nbFig <- ncol(paramResults)
    x_NbFig <- ceiling(max(sqrt(nbFig),1)); y_NbFig <- ceiling(nbFig/x_NbFig)
    par(mfrow = c(x_NbFig, y_NbFig), oma = c(0, 3, 1, 1), mar = c(3, 1, 0, 3), mgp = c(1, 1, 0), xpd = NA)
    for(indexFigure in 1:nbFig){
      res <- paramResults[,indexFigure]
      resQ <- quantile(res,c((1-level)/2,(1+level)/2))
      bxp <- boxplot(res, xlab = paste0(colnames(paramResults)[indexFigure],'\n',level*100,'% CI: [',toString(round(resQ[1],3)),', ',toString(round(resQ[2],3)),']'))
    }
  }
  return(paramResults)
}

##############################################################################
# Generate data sets by resampling the original data set
##############################################################################
generateDataSetResample = function(project, settings, boot.folder, dataFolder){
  
  if (!file.exists(project)) {
    stop("project '", project, "' does not exist", call.=F)
  }
  
  mlx.loadProject(project)   

  if(is.null(settings)){
    settings$nboot <- 100 
    settings$N <- NA
    settings$covStrat <- NA
    settings$seed <- NA
  }
  if(!is.na(settings$seed)){set.seed(settings$seed)}
  
  # Prepare all the output folders
  exportDir <- mlx.getProjectSettings()$directory
  projectName <- basename(tools::file_path_sans_ext(project))
  dir.create(file.path(exportDir, boot.folder), showWarnings = FALSE, recursive = TRUE)
  
  # Get the data set information
  referenceDataset <- mlx.getData()
  cov <- mlx.getAllCovariateInformation()
  datasetFile <- referenceDataset$dataFile
  refCovInfo  <- mlx.getCovariateInformation()
  # Get the index in mlx.getCovariateInformation()$ of the covariates used in the statistical model
  indexUsedCat <- NULL
  for(indexCov in seq_len(length(refCovInfo$name))){
    if(grepl("categorical", refCovInfo$type[indexCov], fixed=TRUE)){
      # Is the covariate used in the covariate model
      isUsed <- F
      idCov <- which(names(mlx.getIndividualParameterModel()$covariateModel[[1]])==refCovInfo$name[indexCov])
      for(indexParam in 1:length(mlx.getIndividualParameterModel()$covariateModel)){
        isUsed <- isUsed||mlx.getIndividualParameterModel()$covariateModel[[indexParam]][idCov] 
      }
      if(isUsed){
        indexUsedCat <- c(indexUsedCat, indexCov)
      }
    }
  }

  dir.create(file.path(exportDir, boot.folder, 'data'), showWarnings = FALSE)
  
  # Load the data set
  sepBoot <- .getDelimiter(datasetFile)
  dataset <- read.table(file=datasetFile, sep=sepBoot, header=T, dec=".", 
                        check.names=FALSE, comment.char="")
  names(dataset) <- gsub(" ", "_", names(dataset))
  
  indexID <- which(referenceDataset$headerTypes=="id")
  nameID <- unique(dataset[, indexID])
  nbIndiv <- length(nameID)
  if(is.na(settings[['N']])){settings[['N']] = nbIndiv}
  
  validID <- list()
  
  if(is.na(settings$covStrat)){
    nbCAT = 1
    indexPropCAT <- 1
    propCAT <- rep(settings[['N']], nbCAT)
    validID[[indexPropCAT]] <- nameID
  }else{
    indexCAT <- which(names(cov$covariate) == settings$covStrat)
    
    isCatVaryID <- F
    for(indexIDtestCAT in 1:length(nameID)){
      cat <- cov$covariate[which(cov$covariate[,1]==nameID[indexIDtestCAT]),indexCAT]
      if(length(unique(cat))>1){
        isCatVaryID <- T
      }
    }
    
    if(isCatVaryID){# The covariate vary within the subject occasion, 
      cat(paste0("The generated data set can not preserve proportions of ", settings$covStrat," as the covariate vary in within the subject.\n"))
      nbCAT = 1
      indexPropCAT <- 1
      propCAT <- rep(settings[['N']], nbCAT)
      validID[[indexPropCAT]] <- nameID
    }else{
      catValues <- cov$covariate[,indexCAT]
      nameCAT <- unique(catValues)
      nbCAT <- length(nameCAT)
      propCAT <- rep(settings[['N']], nbCAT)
      validID <- list()
      for(indexPropCAT in 1:nbCAT){
        indexIdCat <- which(catValues==nameCAT[indexPropCAT])
        propCAT[indexPropCAT] <- max(1,floor(settings[['N']]*length(indexIdCat)/nbIndiv))
        validID[[indexPropCAT]] <- as.character(cov$covariate[indexIdCat,1])
      }
    }
  }
  warningAlreadyDisplayed <- F
  
  paramResults <- NULL
  for(indexSample in 1:settings$nboot){
    cat(paste0("Bootstrap replicate ", indexSample, "/", settings$nboot, "\n"))
    datasetFileName <- file.path(exportDir,boot.folder,'data', paste0('dataset_',toString(indexSample),'.csv'))
    if(!file.exists(datasetFileName)){
      ##################################################################################################################
      # Generate the data set
      ##################################################################################################################
      cat(" => Generating a data set\n")
      # Sample the IDs
      areAllModalitiesdrawn <- F
      while(!areAllModalitiesdrawn){
        sampleIDs <- NULL
        for(indexValidID in 1:length(validID)){
          samples <- NULL
          if(length(validID[[indexValidID]])==1){
            sampleIDs <- c(sampleIDs,  rep(x = validID[[indexValidID]], times = propCAT[indexValidID]) )
          }else{
            samples <- sample(x = validID[[indexValidID]], size = propCAT[indexValidID], replace = TRUE)
          }
          sampleIDs <- c(sampleIDs, as.character(samples))
        }
        areAllModalitiesdrawn <- T
        if(length(indexUsedCat)>0){
          # Check if all used modalities are in the data set
          for(iUsedCat in 1:length(indexUsedCat)){
            catModalities <- unique(refCovInfo$covariate[, which(names(refCovInfo$covariate)==refCovInfo$name[indexUsedCat[iUsedCat]])])
            catSamplesModalities <- NULL
            for(indexSamples in 1:length(sampleIDs)){
              idIndex = which(refCovInfo$covariate[,1]==sampleIDs[indexSamples])
              catSamplesModalities <- c(catSamplesModalities, refCovInfo$covariate[idIndex,which(names(refCovInfo$covariate)==refCovInfo$name[indexUsedCat[iUsedCat]])])
            }
            areAllModalitiesdrawn <- areAllModalitiesdrawn&(length(catModalities)==length(unique(catSamplesModalities)))
          }
        }
      }
      
      if(!(length(sampleIDs)==settings[['N']])){
        if(!warningAlreadyDisplayed){
          cat(paste0("The generated data set contains only ",length(sampleIDs)," individuals because otherwise categorical proportions of ",settings$covStrat," cannot be kept.\n"))
          warningAlreadyDisplayed = TRUE
        }
      }
      # Get the datas
      data <- NULL
      dataID <- NULL
      indexLineFull <- NULL
      for(indexSampleSize in 1:length(sampleIDs)){
        indexLine <- which(dataset[,indexID]==sampleIDs[indexSampleSize])
        indexLineFull <- c(indexLineFull,indexLine)
        dataID <- c(dataID, rep(indexSampleSize,length(indexLine)))
      }
      data <- dataset[indexLineFull,]
      data[,indexID] <- dataID
      write.table(x = data, file = datasetFileName, sep = sepBoot,
                  eol = '\n', quote = FALSE, dec = '.',  row.names = FALSE, col.names = TRUE )
    } else {
      cat(" => Reusing an existing data set\n")
    }

    # Generate a Monolix project and run the tasks
    projectName <- generateBootstrapProject(project, boot.folder, indexSample, datasetFileName)
    results <- runBootstrapProject(projectName, indexSample, settings)
    paramResults <- rbind(paramResults, results)
    if (settings$deleteData) {
      cat(" => Deleting the data set\n")
      unlink(datasetFileName)
    }
    if (settings$deleteProjects) {
      cat(" => Deleting the project\n")
      unlink(projectName)
      unlink(paste0(tools::file_path_sans_ext(projectName), ".mlxproperties"))
      unlink(tools::file_path_sans_ext(projectName), recursive = TRUE)
    }
  }
  return(paramResults)
}

generateDataSetParametricSimulx = function(project, settings=NULL, boot.folder=NULL){

  if(!file.exists(project)){
    stop("Project ", project, ", does not exist.", call.=F)
  }
  suppressMessages(mlx.initializeLixoftConnectors())
  mlx.loadProject(project)
  obsInfo <- mlx.getObservationInformation()
  mlxHeaders <- mlx.getData()$headerTypes
  if (length(obsInfo$mapping) == 1 && "obsid" %in% mlxHeaders) {
    obsID <- unname(obsInfo$mapping)
    columnName <- mlx.getData()$header[mlx.getData()$headerTypes == "obsid"]
  } else {
    obsID <- NULL
  }
  mapObservation <- c(obsInfo$mapping)

  # return a warning in case of censoring data
  if (is.element("cens", mlx.getData()$headerTypes)) {
    warning("Note that censoring is not performed on boostrap simulations.", call. = FALSE)
  }
  # Prepare all the output folders
  exportDir <- mlx.getProjectSettings()$directory
  projectName <- basename(tools::file_path_sans_ext(project))
  dir.create(file.path(exportDir, boot.folder), showWarnings = FALSE, recursive = TRUE)
  dir.create(file.path(exportDir, boot.folder, 'data'), showWarnings = FALSE, recursive = TRUE)
  
  if (!is.na(settings$seed)) smlx.setProjectSettings(seed = settings$seed)
  
  monolixPath <- mlx.getLixoftConnectorsState()$path
  
  paramResults <- NULL
  for (indexSample in 1:settings$nboot) {
    cat(paste0("Bootstrap replicate ", indexSample, "/", settings$nboot, "\n"))
    datasetFileName <- paste0(exportDir,boot.folder, 'data/dataset_',toString(indexSample),'.csv')
    if(!file.exists(datasetFileName)) {
      cat(" => Generating a bootstrap data set\n")
      suppressMessages(mlx.initializeLixoftConnectors(software="simulx", path=monolixPath, force=TRUE))
      tryCatch(
        {
          # activate only lixoft errors
          op <- get_lixoft_options()
          set_options(errors = TRUE, warnings = FALSE, info = FALSE)
          
          smlx.importMonolixProject(project)
          incrementSeed <- smlx.getGroups()[[1]]$size

          if (!is.na(settings$seed)) {
            smlx.setProjectSettings(seed = settings$seed + indexSample * incrementSeed)
          } else {
            smlx.setProjectSettings(seed = 123456 + indexSample * incrementSeed)
          }
          smlx.runSimulation()
          smlx.exportSimulatedData(path = datasetFileName)

          # add observation ID column
          data <- utils::read.csv(datasetFileName)
          if (!is.null(obsID)) {
            data$obsid <- obsID
          } else {
            if ("obsid" %in% mlxHeaders) {
              for (obs in names(mapObservation)) {
                data$obsid[data$obsid == obs] <- mapObservation[obs]
              }
            }
          }
          utils::write.csv(x = data, file = datasetFileName,
                           row.names = FALSE, quote = FALSE)
          
          set_options(errors = op$errors, warnings = op$warnings, info = op$info)
        },
        message = function(m) {
          m <- as.character(m)
          set_options(errors = op$errors, warnings = op$warnings, info = op$info)
          if (grepl("[ERROR]", as.character(m))) {
            message <- gsub(": ", "", regmatches(m, regexpr(": .*", m, perl=TRUE)))
            stop("[Simulx Error] ", message, call. = FALSE)
          }
        }
      )
      # warning for simulx bug
      .catchSimulxBug()
    } else {
      cat(" => Reusing an existing data set\n")
    }

    # Generate a Monolix project and run the tasks
    suppressMessages(mlx.initializeLixoftConnectors(software="monolix", path=monolixPath, force=TRUE))
    projectName <- generateBootstrapProject(project, boot.folder, indexSample, datasetFileName)

    results <- runBootstrapProject(projectName, indexSample, settings)
    paramResults <- rbind(paramResults, results)
    if (settings$deleteData) {
      cat(" => Deleting the data set\n")
      unlink(datasetFileName)
    }
    if (settings$deleteProjects) {
      cat(" => Deleting the project\n")
      unlink(projectName)
      unlink(paste0(tools::file_path_sans_ext(projectName), ".mlxproperties"))
      unlink(tools::file_path_sans_ext(projectName), recursive = TRUE)
    }
  }
  
  suppressMessages(mlx.initializeLixoftConnectors(software = "monolix"))
  return(paramResults)
}

##############################################################################
# Generate projects based on dataFolder
##############################################################################
generateBootstrapProject = function(project, boot.folder, indexSample, dataFile){

  # Get the data set information
  op <- get_lixoft_options()
  set_options(info = FALSE)
  mlx.loadProject(project)
  set_options(info = op$info)
  
  exportDir <- mlx.getProjectSettings()$directory
  projectName <- basename(tools::file_path_sans_ext(project))
  
  # 
  scenario = mlx.getScenario()
  scenario$tasks = c(populationParameterEstimation=TRUE)
  mlx.setScenario(scenario)
  
  # generate boot headers
  referenceDataset <- mlx.getData()
  bootData <- .getBootData(referenceDataset, dataFile)

  projectBootFileName =  file.path(exportDir, boot.folder, paste0(projectName, '_bootstrap_', toString(indexSample), '.mlxtran'))
  if(!file.exists(projectBootFileName)){
    cat(" => Generating a project with the bootstrap data set\n")

    # deactivate lixoft warnings
    op <- get_lixoft_options()
    set_options(warnings = FALSE, info = FALSE)
    
    mapping <- mlx.getMapping()

    bootData$dataFile <- dataFile
    mlx.setData(bootData)
    
    mlx.setMapping(mapping)

    #      mlx.setStructuralModel(modelFile=mlx.getStructuralModel())
    mlx.setProjectSettings(dataandmodelnexttoproject = FALSE)
    mlx.saveProject(projectFile =projectBootFileName)

    # reactivate lixoft warnings
    set_options(warnings = op$warnings, info = op$info)
  } else {
    cat(" => Reusing an existing project\n")
  }
  return(projectBootFileName)
}


##################################################################################################################
# Clean the bootstrap folder
##################################################################################################################
cleanbootstrap <- function(project,boot.folder){
  # Prepare all the output folders
  cat('Clearing all previous results and projects \n')
  mlx.loadProject(project)
  exportDir <- mlx.getProjectSettings()$directory
  listProjectsToDelete <- list.files(path = paste0(exportDir,boot.folder), pattern = '*.mlxtran')
  
  if(length(listProjectsToDelete)>0){
    for(indexProject in 1:length(listProjectsToDelete)){
      projectBoot <-  paste0(exportDir,boot.folder,listProjectsToDelete[indexProject])
      exportDirToClean <- gsub(x = projectBoot, pattern = '.mlxtran', '')
      unlink(exportDirToClean, recursive = TRUE)
      unlink(projectBoot, recursive = FALSE)
    }
  }
  unlink(file.path(exportDir, boot.folder,'data'), recursive = TRUE, force = TRUE)
}

runBootstrapProject <- function(projectBoot, indexSample, settings) {
    # deactivate lixoft warnings
    op <- get_lixoft_options()
    set_options(warnings = FALSE, info = FALSE)
    
    mlx.loadProject(projectBoot)
    
    # reactivate lixoft warnings
    set_options(warnings = op$warnings, info = op$info)

    # Check if the run was done
    # if(!file.exists(paste0(mlx.getProjectSettings()$directory,'/populationParameters.txt'))){
    launched.tasks <- mlx.getLaunchedTasks()
    g <- mlx.getScenario()
    g$tasks <- settings$tasks
    mlx.setScenario(g)
    scenario.tasks <- mlx.getScenario()$tasks
    mlx.saveProject(projectBoot)
    if (!launched.tasks[["populationParameterEstimation"]]) {
      if (sum(scenario.tasks)==1)
        cat(' => Estimating the population parameters \n')
      else
        cat(' => Running the scenario \n')
      mlx.runScenario()
    } else {
      missing.tasks <- 0
      if (scenario.tasks[['conditionalDistributionSampling']] && !launched.tasks[['conditionalDistributionSampling']]) {
        missing.tasks <- missing.tasks + 1
        if (missing.tasks==1) cat(' => Running the missing tasks \n')
        mlx.runConditionalDistributionSampling()
      }
      if (scenario.tasks[['conditionalModeEstimation']] && !launched.tasks[['conditionalModeEstimation']]) {
        missing.tasks <- missing.tasks + 1
        if (missing.tasks==1) cat(' => Running the missing tasks \n')
        mlx.runConditionalModeEstimation()
      }
      if (scenario.tasks[['standardErrorEstimation']] && launched.tasks[['standardErrorEstimation']]==FALSE) {
        missing.tasks <- missing.tasks + 1
        if (missing.tasks==1) cat(' => Running the missing tasks \n')
        mlx.runStandardErrorEstimation(linearization=g$linearization)
      }
      if (scenario.tasks[['logLikelihoodEstimation']] && !launched.tasks[['logLikelihoodEstimation']]) {
        missing.tasks <- missing.tasks + 1
        if (missing.tasks==1) cat(' => Running the missing tasks \n')
        mlx.runLogLikelihoodEstimation()
      }
      if (missing.tasks==0)  {
        if (sum(scenario.tasks)==1)
          cat(' => Population parameters already estimated \n')
        else
          cat(' => Tasks already performed \n')
      }
    }
    return(mlx.getEstimatedPopulationParameters())
}


###################################################################################
.initBootstrapSettings = function(settings, parametric){
  if (is.null(settings)) settings <- list()
  
  invalidParametricSettings <- intersect(c("N", "covStrat"), names(settings))
  if (parametric == T & length(invalidParametricSettings)) {
    if (length(invalidParametricSettings) > 1) {
      message <- paste0(paste(invalidParametricSettings, collapse = ", "), " arguments are ignored.")
    } else {
      message <- paste0(invalidParametricSettings, " argument is ignored.")
    }
    warning("In case of parametric boostrap, ", message, call. = FALSE)
    settings <- settings[! names(settings) %in% c("N", "covStrat")]
  }
  
  .check_in_vector(
    names(settings), "settings",
    c("plot", "level", "N", "newResampling", "covStrat", "seed", "deleteData", "deleteProjects")
  )
  
  if (!is.element("plot", names(settings))) settings$plot <- F
  if (!is.element("level", names(settings))) settings$level <- 0.90
  if (!is.element("N", names(settings))) settings$N <- NA
  if (!is.element("newResampling", names(settings))) settings$newResampling <- F
  if (!is.element("covStrat", names(settings))) settings$covStrat <- NA
  if (!is.element("seed", names(settings))) settings$seed <- NA
  if (!is.element("deleteData", names(settings))) settings$deleteData <- F
  if (!is.element("deleteProjects", names(settings))) settings$deleteProjects <- F
  
  if (!is.na(settings$N)) .check_strict_pos_integer(settings$N, "settings N")
  .check_double(settings$level, "settings level")
  .check_in_range(settings$level, "settings level", 0, 1)
  .check_bool(settings$newResampling, "settings newResampling")
  .check_bool(settings$plot, "settings plot")
  .check_bool(settings$deleteData, "settings deleteData")
  .check_bool(settings$deleteProjects, "settings deleteProjects")
  if (!is.na(settings$covStrat)) {
    .check_string(settings$covStrat, "settings covStrat")
    covariates <- mlx.getAllCovariateInformation()
    catcovariates <- names(covariates$type[covariates$type %in% c("categorical", "categoricaltransformed", "stratificationcategorical")])
    .check_in_vector(settings$covStrat, "settings covStrat", catcovariates)
  }
  if (!is.na(settings$seed)) .check_strict_pos_integer(settings$seed, "settings seed")
  
  if (parametric) settings <- settings[! names(settings) %in% c("N", "covStrat")]
  return(settings)
}

.getBootData <- function(refData, bootFile, smlxHeaders = NULL) {
  bootData <- refData
  mlxHeaders <- refData$header
  mlxHeadersType <- refData$headerTypes
  df <- utils::read.table(file = bootFile, nrow = 0, sep = .getDelimiter(bootFile), 
                          header = T, comment.char="", check.names = FALSE)
  
  if (is.null(smlxHeaders)) {
    smlxHeaders <- names(df)
  }

  jid <- which(mlxHeadersType=="id")
  if (gsub("_","",mlxHeaders[jid]) == gsub("#","",smlxHeaders[jid]))
    smlxHeaders[jid] <- mlxHeaders[jid]
  intersectHeaders <- intersect(smlxHeaders, mlxHeaders)
  smlxHeadersType <- rep(NA, length(smlxHeaders))
  for (h in intersectHeaders) {
    smlxHeadersType[smlxHeaders == h] <- mlxHeadersType[mlxHeaders == h]
  }
  
  matchSmlx <- c(ID = "id", AMOUNT = "amount", TIME = "time", INFUSION.DURATION = "tinf",
                 ADMINISTRATION.ID = "admid", obsid = "obsid", EVENT.ID = "evid", obs = "observation")
  smlxHeadersType[smlxHeaders %in% setdiff(smlxHeaders, intersectHeaders)] <- unname(matchSmlx[setdiff(smlxHeaders, intersectHeaders)])
  
  obsInfo <- mlx.getObservationInformation()
  if (any(is.na(smlxHeadersType))) {
    smlxHeadersType[is.na(smlxHeadersType) & smlxHeaders %in% obsInfo$name] <- "observation"
  }
  obsNames <- smlxHeaders[smlxHeadersType == "observation"]
  bootData$dataFile <- bootFile
  bootData$headerTypes <- smlxHeadersType
  
  if ("obsid" %in% mlxHeadersType) {
    obsids <- sort(unique(df[[smlxHeaders[!is.na(smlxHeadersType) & smlxHeadersType == "obsid"]]]))
    bootData$observationTypes <- refData$observationTypes[intersect(sort(obsids), names(refData$observationTypes))]
  } else {
    bootData$observationTypes <- unname(refData$observationTypes)
  }
  bootData <- bootData[c("dataFile", "headerTypes", "observationTypes", "nbSSDoses")]
  
  return(bootData)
}

.catchSimulxBug <- function() {
  # warning for simulx bug
  group <- smlx.getGroups()[[1]]
  ids <- list()
  if (length(grep("^mlx_", group$output)) >= 1) {
    outputelements <- smlx.getOutputElements()[grep("^mlx_", group$output, value = TRUE)]
    outputelements <- outputelements[sapply(outputelements, function(o) is.element("ID", names(o$data)))]
    for (element in names(outputelements)) {
      ids[[element]] <- unique(outputelements[[element]]$data$ID)
    }
  }
  if (any(grepl("^mlx_", group$treatment))) {
    treatelements <- smlx.getTreatmentElements()[grep("^mlx_", group$treatment, value = TRUE)]
    treatelements <- treatelements[sapply(treatelements, function(o) is.element("ID", names(o$data)))]
    for (element in names(treatelements)) {
      ids[[element]] <- unique(treatelements[[element]]$data$ID)
    }
  }
  missingIds  <- c()
  if (length(ids) > 1) {
    missingIds <- setdiff(Reduce(union, unname(ids)), Reduce(intersect, unname(ids)))
  }
  if (length(missingIds) > 0) {
    # BUG in Simulx 2020R1 & 2021R1
    warning("Due to a limitation in Simulx, the ids ", paste(missingIds, collapse = ", "),
            " will be excluded from the simulation because they do not appear in some of the treatment and output tables.",
            call. = FALSE)
  }
}

# CHeck functions --------------------------------------------------------------
.check_bool <- function(bool, argname) {
  if (!is.logical(bool))
    stop("Invalid ", argname, ". It must be logical.", call. = F)
  return(bool)
}

.check_string <- function(str, argname) {
  if (!is.string(str))
    stop("Invalid ", argname, ". It must be a string.", call. = F)
  return(str)
}

.check_strict_pos_integer <- function(int, argname) {
  if(!(is.double(int)||is.integer(int)))
    stop("Invalid ", argname, ". It must be a strictly positive integer.", call. = F)
  if ((int <= 0) || (!as.integer(int) == int))
    stop("Invalid ", argname, ". It must be a strictly positive integer.", call. = F)
  return(int)
}

.check_in_range <- function(arg, argname, lowerbound = -Inf, upperbound = Inf, includeBound = TRUE) {
  if (includeBound) {
    if (! all(arg >= lowerbound) | ! all(arg <= upperbound)) {
      stop(argname, " must be in [", lowerbound, ", ", upperbound, "].", call. = F)
    }
  } else {
    if (! all(arg > lowerbound) | ! all(arg < upperbound)) {
      stop(argname, " must be in ]", lowerbound, ", ", upperbound, "[.", call. = F)
    }
  }
  return(invisible(TRUE))
}

is.string <- function(str) {
  isStr <- TRUE
  if (!is.null(names(str))) {
    isStr <- FALSE
  } else if (length(str) > 1) {
    isStr <- FALSE
  } else if (! is.character(str)) {
    isStr <- FALSE
  }
  return(isStr)
}

.check_in_vector <- function(arg, argname, vector) {
  if (is.null(arg)) return(invisible(TRUE))
  if (! all(is.element(arg, vector))) {
    stop(argname, " must be in {'", paste(vector, collapse="', '"), "'}.", call. = F)
  }
  return(invisible(TRUE))
}

.check_double <- function(d, argname) {
  if(!(is.double(d)||is.integer(d)))
    stop("Invalid ", argname, ". It must be a double.", call. = F)
  return(d)
}

Try the Rsmlx package in your browser

Any scripts or data that you put into this service are public.

Rsmlx documentation built on June 22, 2024, 9:29 a.m.