R/emulation_sensitivity_analysis.R

Defines functions emulate_efast_sampled_parameters emulate_lhc_sampled_parameters

Documented in emulate_efast_sampled_parameters emulate_lhc_sampled_parameters

#' Emulate simulations for a set of eFAST generated parameter values
#'
#' This method runs an ensemble for all parameter value sets specified in a CSV
#' file generated from spartan eFAST sampling techniques. The output is a set
#' of  CSV files that can then be analysed using the spartan analysis methods
#' detailed in Technique 4.
#'
#' @param filepath Path to the folder containing the set of CSV files for eFAST
#' analysis
#' @param surrogateModel Ensemble or emulator to use to emulate the simulation
#' responses for those sets
#' @param parameters Simulation parameter names
#' @param measures Simulation output response names
#' @param num_curves Number of resample curves used in eFAST sampling
#' @param ensemble_set Boolean stating whether this analysis is being run using
#' an ensemble of machine learning methods (TRUE), or a single emulator (FALSE)
#' @param normalise Whether the parameters in the sampling CSV files need to be
#' normalised before input to the emulator (which must take values between 0
#' and 1)
#' @param timepoint Simulation timepoint being analysed, if more than one. See
#' the vignette for analysing more than one timepoint
#' @param csv_file_input Whether parameters to emulate are being supplied in a CSV
#' file. Default is TRUE, with spartanDB this is false
#' @param spartan_sample_obj Where spartanDB is used, or another application that
#' supplied parameter values as an R object created by spartan's sampling method,
#' the name of that R object
#' @param write_csv_file_out Whether results for each curve should be output to a
#' CSV file. SpartanDB instead requires an R object containing this data, which is
#' returned
#' @param normalise_result Whether the resultant predictions should be
#' normalised
#'
#' @export
emulate_efast_sampled_parameters <- function(filepath, surrogateModel,
                                               parameters,  measures,
                                               num_curves,
                                               ensemble_set = TRUE,
                                               normalise = FALSE,
                                               timepoint = NULL,
                                               csv_file_input=TRUE,
                                               spartan_sample_obj=NULL,
                                              write_csv_file_out=TRUE,
                                             normalise_result = FALSE) {

  all_curve_results<-vector("list", length = num_curves)
  for (c in 1:num_curves) {
    curve_results <- NULL

    for (p in 1:length(parameters)) {
      curve_param_result <- NULL

      if(csv_file_input)
      {
        spartan_sample <- read_from_csv(file.path(filepath, paste("Curve", c, "_",
                                        parameters[p], ".csv", sep = "")))
      }
      else
      {
        # Using spartanDB, can read from the spartan sample object
        spartan_sample <- data.frame(spartan_sample_obj[,,p,c])
        colnames(spartan_sample)<-parameters
      }

      if("dummy" %in% parameters)
        parameters_minus_dummy <- setdiff(parameters, "dummy")
      else if("Dummy" %in% parameters)
        parameters_minus_dummy <- setdiff(parameters, "Dummy")
      else
        parameters_minus_dummy <- parameters


      spartan_sample <- spartan_sample[, parameters_minus_dummy]

      for (r in 1:nrow(spartan_sample)) {
        param_sample <- spartan_sample[r, ]

        # Now we can make a prediction for this set of parameters
        # generate the prediction
        if(ensemble_set == TRUE)
        {
          prediction  <-  use_ensemble_to_generate_predictions(
            surrogateModel, param_sample[, parameters_minus_dummy],
            parameters_minus_dummy, measures, normalise,  normalise_result)
        } else {
          prediction <- emulator_predictions(
            surrogateModel, parameters_minus_dummy, measures,
            param_sample[,parameters_minus_dummy], normalise,  normalise_result)
        }


        curve_param_result <- rbind(curve_param_result, prediction)

      }
      header <- NULL
      for (m in 1:length(measures)) {
        header <-  c(header, paste(parameters[p], "_", measures[m], sep = ""))
      }
      colnames(curve_param_result) <- header

      curve_results <- cbind(curve_results, curve_param_result)
    }

    # Now output the curve results as spartan would
    if(write_csv_file_out)
    {
      if (is.null(timepoint))
        curveoutputfile <- paste(filepath, "/Curve", c, "_Results_Summary.csv",
                                 sep = "")
      else
        curveoutputfile <- paste(filepath, "/Curve", c, "_", timepoint,
                                 "_Results_Summary.csv", sep = "")

      write.csv(curve_results, curveoutputfile, quote = FALSE, row.names = FALSE)
    }
    else
    {
      all_curve_results[[c]]<-curve_results
    }
  }

  if(!write_csv_file_out)
    return(all_curve_results)


}

#' Emulate simulations for a set of latin-hypercube generated parameter values
#'
#' This method runs an ensemble for all parameter value sets specified in a CSV
#' file generated from spartan latin-hypercube sampling techniques. The output
#' is a CSV file that can then be analysed using the spartan analysis methods
#' detailed in Technique 3.
#'
#' @param filepath Path to where the analysis output should be stored
#' @param surrogateModel Ensemble or emulator to use to emulate the simulation
#' responses for those sets
#' @param parameters Simulation parameter names
#' @param measures Simulation output response names
#' @param measure_scale Scale of each of the simulation responses
#' @param param_file Name of the CSV file generated by spartan (or of)
#' parameter values, separated in columns - if reading these in from a file
#' @param dataset Name of the R dataset in the environment that contains the
#' parameter sets (the tutorial one is emulated_lhc_values)
#' @param ensemble_set Boolean stating whether this analysis is being run using
#' an ensemble of machine learning methods (TRUE), or a single emulator (FALSE)
#' @param normalise Whether the parameters in the sampling CSV file need to be
#' normalised before input to the emulator (which must take values between 0
#' and 1)
#' @param timepoint Simulation timepoint being analysed, if more than one. See
#' the vignette for analysing more than one timepoint
#' @param timepointscale Scale of the timepoints, if being used
#' @param write_csv_files Whether output should be written to CSV file. Used with spartanDB
#' @param graph_results Whether plots should be produced for this analysis
#'
#' @export
emulate_lhc_sampled_parameters  <-  function(filepath, surrogateModel,
                                             parameters, measures,
                                             measure_scale, param_file = NULL,
                                             dataset = NULL, ensemble_set = TRUE,
                                             normalise = FALSE,
                                             timepoint = NULL,
                                             timepointscale = NULL,
                                             write_csv_files = TRUE,
                                             graph_results=TRUE) {

  #emulate_lhc_sampled_parameters(filepath, built_ensemble, parameters, measures, measure_scale, dataset = emulated_lhc_values)

  if(!is.null(param_file) | !is.null(dataset)) {
    if(!is.null(param_file)) {
      # Read in the parameter value file generated in sampling
      spartan_sample <- read.csv(param_file, header = T)
    } else {
      spartan_sample <- dataset
    }

    param_values_with_predictions <- NULL

    for (samp in 1:nrow(spartan_sample)) {
      # Retrieve the parameter row:
      params <- spartan_sample[samp, ]

      # generate the prediction - need to be careful here whether we are
      # using an ensemble or emulator
      if(ensemble_set == FALSE) {
        prediction <- emulator_predictions(surrogateModel, parameters, measures,
                                           params, normalise)
      } else {
        prediction <- use_ensemble_to_generate_predictions(surrogateModel,
                                                           params, parameters,
                                                           measures,
                                                           normalise_values = normalise,
                                                           normalise_result = normalise)
      }

      # Bind to the list of parameters
      param_values_with_predictions <- rbind(param_values_with_predictions,
                                              cbind(params, prediction))
    }

    # Now write this out as if it was the spartan LHC summary
    colnames(param_values_with_predictions) <- c(parameters, measures)


    if(write_csv_files)
    {
      if (is.null(timepoint)) {
        lhcsummaryfilename <- file.path(filepath, "Emulated_LHC_Summary.csv")
        correlation_coeffs <- file.path(filepath, "CorrelationCoefficients.csv")

      } else {
        lhcsummaryfilename <- file.path(filepath, paste("Emulated_LHC_Summary_",timepoint,".csv",sep=""))
        correlation_coeffs <- file.path(filepath, paste("CorrelationCoefficients_",timepoint,".csv",sep=""))
      }

      write.csv(param_values_with_predictions, lhcsummaryfilename,
                quote = FALSE, row.names = FALSE)
    }


    # Analyse:
    # Method takes care of adding timepoint to the file names
    prccs<-lhc_generatePRCoEffs(FILEPATH=filepath, parameters,  measures,
                                LHCSUMMARYFILENAME=NULL,
                         "CorrelationCoefficients.csv",
                         c(timepoint), timepointscale, check_done=TRUE,
                         write_csv_files=write_csv_files,
                         lhc_summary_object=param_values_with_predictions)

    if(graph_results)
    {
      lhc_graphMeasuresForParameterChange(
        filepath, parameters, measures, measure_scale, CORCOEFFSOUTPUTFILE=NULL,
        LHCSUMMARYFILENAME=NULL, OUTPUT_TYPE = c("PNG"), TIMEPOINTS = c(timepoint),
        TIMEPOINTSCALE = timepointscale, check_done=TRUE, corcoeffs_output_object=prccs,
        lhc_summary_object=param_values_with_predictions)
    }

    # return the PRCC values, spartan DB uses these to add to a DB
    if(!write_csv_files)
    {
      message("Object returned contains ensemble generated predictions and PRCC values for this experiment")
      return(list("predictions"=param_values_with_predictions,"prccs"=prccs))
    }

  } else {
    message("You must specify either an R object containing parameter values, or the name of a CSV file in the filepath containing parameter values")
  }
}

Try the spartan package in your browser

Any scripts or data that you put into this service are public.

spartan documentation built on May 2, 2019, 9:39 a.m.