R/efast_sampling.R

Defines functions efast_generate_sample generate_efast_parameter_sets output_param_sets_per_curve

Documented in efast_generate_sample generate_efast_parameter_sets output_param_sets_per_curve

#' Generates parameter sets for variance-based eFAST Sensitivity Analysis
#'
#' This technique analyses simulation results generated through sampling
#' using the eFAST approach (extended Fourier Amplitude Sampling Test).
#' This perturbs the value of all parameters at the same time, with the
#' aim of partitioning the variance in simulation output between input
#' parameters. Values for each parameter are chosen using fourier frequency
#' curves through a parameters potential range of values. A selected
#' number of values are selected from points along the curve. Though all
#' parameters are perturbed simultaneously, the method does focus on
#' one parameter of interest in turn, by giving this a very different
#' sampling frequency to that assigned to the other parameters.
#' Thus for each parameter of interest in turn, a sampling frequency is
#' assigned to each parameter and values chosen at points along the curve. So
#' a set of simulation parameters then exists for each parameter of interest.
#' As this is the case, this method can be computationally expensive,
#' especially if a large number of samples is taken on the parameter search
#' curve, or there are a large number of parameters. On top of this, to ensure
#' adequate sampling each curve is also resampled with a small adjustment to the
#' frequency, creating more parameter sets on which the simulation should be run.
#' This attempts to limit any correlations and limit the effect of repeated
#' parameter value sets being chosen. Samples are output to CSV file, one per
#' parameter/curve pairing
#'
#' @param FILEPATH Directory where the parameter samples should be output to
#' @param NUMCURVES The number of 'resamples' to perform (see eFAST
#' documentation) - recommend using at least 3
#' @param NUMSAMPLES The number of parameter subsets to generate - should be at
#'  least 65 for eFAST
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated.  For eFAST, remember to add a
#' parameter named 'Dummy'
#' @param PMIN Array containing the minimum value that should be used for each
#' parameter and the dummy. Sets a lower bound on sampling space
#' @param PMAX Array containing the maximum value that should be used for each
#' parameter and the dummy. Sets an upper bound on sampling space
#' @param write_csv Whether the sample should be output to CSV file. Should be
#' true unless using spartan-db
#' @param return_sample Whether the complete sample should be returned as an
#' R object. Used by spartan db and added to the database
#'
#' @export
efast_generate_sample <- function(FILEPATH, NUMCURVES, NUMSAMPLES,
                                  PARAMETERS, PMIN, PMAX, write_csv=TRUE,
                                  return_sample=FALSE) {

  # Version 3.1 adds pre-execution check functions as part of refactoring:
  # Get the provided function arguments
  input_check <- list("arguments"=as.list(match.call()),"names"=names(match.call())[-1])
  # Run if all checks pass:

  if(check_input_args(input_check$names, input_check$arguments)) {

    parameter_vals <- generate_efast_parameter_sets(FILEPATH, NUMCURVES,
                                                    NUMSAMPLES, PARAMETERS,
                                                    PMIN, PMAX)

    # NOW OUTPUT THE RESULTS - SPLIT BY CURVE FILE, IF REQUESTED
    # SO, WILL HAVE ONE FILE FOR EACH PARAMETER OF INTEREST, FOR EACH CURVE
    if(write_csv==TRUE)
    {
      output_param_sets_per_curve(FILEPATH, NUMCURVES, PARAMETERS, parameter_vals)
    }

    # For spartan DB, we need the object so we can add these to the database
    if(return_sample==TRUE)
      return(parameter_vals)


  }
}


#' Use the eFAST approach to generate parameter sets
#'
#' @param FILEPATH Directory where the parameter samples should be output to
#' @param NUMCURVES The number of 'resamples' to perform (see eFAST
#' documentation) - recommend using at least 3
#' @param NUMSAMPLES The number of parameter subsets to generate - should be at
#'  least 65 for eFAST
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated.  For eFAST, remember to add a
#' parameter named 'Dummy'
#' @param PMIN Array containing the minimum value that should be used for each
#' parameter and the dummy. Sets a lower bound on sampling space
#' @param PMAX Array containing the maximum value that should be used for each
#' parameter and the dummy. Sets an upper bound on sampling space
generate_efast_parameter_sets <- function(FILEPATH, NUMCURVES, NUMSAMPLES,
                                          PARAMETERS, PMIN, PMAX)
{
  wanted_n <- NUMSAMPLES * length(PARAMETERS) * NUMCURVES # wanted no. of sample points

  # OUTPUT
  # SI[] : first order sensitivity indices
  # STI[] : total effect sensitivity indices
  # Other used variables/constants:
  # OM[] : vector of k frequencies
  # omi : frequency for the group of interest
  # omci[] : set of freq. used for the compl. group
  # X[] : parameter combination rank matrix
  # AC[],BC[]: fourier coefficients
  # FI[] : random phase shift
  # V : total output variance (for each curve)
  # VI : partial var. of par. i (for each curve)
  # VCI : part. var. of the compl. set of par...
  # AV : total variance in the time domain
  # AVI : partial variance of par. i
  # AVCI : part. var. of the compl. set of par.
  # Y[] : model output

  MI <- 4  # maximum number of fourier coefficients
  # that may be retained in calculating the partial
  # variances without interferences between the
  # assigned frequencies

  # Computation of the frequency for the group
  # of interest omi and the # of sample points NUMSAMPLES (here N=NUMSAMPLES)
  omi <- floor(((wanted_n / NUMCURVES) - 1) / (2 * MI) / length(PARAMETERS))

  # This was the fix I put in for Paul's 30 parameters, but has not worked as desired
  # omi <- floor(((wanted_n / NUMCURVES) - 1) / (2 * MI) / (length(PARAMETERS)/3))

  NUMSAMPLES <- 2 * MI * omi + 1
  if (NUMSAMPLES * NUMCURVES < 65)
    message("Error: sample size must be >= 65 per factor")

  PARAMETERVALS <- array(0, dim = c(NUMSAMPLES, length(PARAMETERS),
                                    length(PARAMETERS), NUMCURVES))

  for (PARAMNUM in 1:length(PARAMETERS)) {

    # Algorithm for selecting the set of frequencies.
    # omci(i), i=1:k-1, contains the set of frequencies
    # to be used by the complementary group.

    omci <- efast_setfreq(length(PARAMETERS), omi / 2 / MI, PARAMNUM)

    # Line used to try ti increase number of frequencies for large number of parameters, but has caused undesirable results
    #omci <- efast_setfreq(length(PARAMETERS), length(PARAMETERS), PARAMNUM)
    OM <- array(0, dim = c(1, length(PARAMETERS), 1))

    # Loop over the NUMCURVES search curves.
    for (CURVENUM in 1:NUMCURVES) {
      # Setting the vector of frequencies OM
      # for the k parameters
      cj <- 1
      for (j in 1:length(PARAMETERS)) {
        if (j == PARAMNUM) {
          # For the parameter (factor) of interest
          # RECODE WHEN WORKED OUT OM ARRAY
          OM[PARAMNUM] <- omi
        } else {
          # For the complementary group.
          # RECODE WHEN WORKED OUT ARRAY
          OM[j] <- omci[cj]
          cj <- cj + 1
        }
      }

      # Setting the relation between the scalar
      # variable S and the coordinates
      # {X(1),X(2),...X(k)} of each sample point.
      FI <- array(runif(length(PARAMETERS), min = 0, max = 1),
                  dim = c(length(PARAMETERS), 1, 1))
      FI <- FI * 2 * pi

      S_VEC <- pi * (2 * (1:NUMSAMPLES) - NUMSAMPLES - 1) / NUMSAMPLES
      OM_VEC <- OM[1:length(PARAMETERS)]

      FI_MAT <- array(0, dim = c(length(PARAMETERS), NUMSAMPLES, 1))

      for (i in 1:NUMSAMPLES) {
        FI_MAT[, i, 1] <- FI
      }

      # FORMULA IN ORIGINAL MATLAB CODE:
      #ANGLE = OM_VEC'*S_VEC+FI_MAT;
      # CONVERSION TO R:
      om_vec_svec <- array(OM_VEC %*% t(S_VEC),
                           dim = c(length(PARAMETERS), NUMSAMPLES, 1))
      ANGLE <- om_vec_svec + FI_MAT

      # TRANSPOSE ARRAY
      ANGLET <- array(0, dim = c(NUMSAMPLES, length(PARAMETERS), 1))
      for (i in 1:NUMSAMPLES) {
        ANGLET[i, , 1] <- ANGLE[, i, 1]
      }

      # NOW CALCULATE THE PARAMETER VALUES - THESE ARE STORED IN A
      # MULTIDIMENSIONAL ARRAY, AS EACH CURVE HAS SEVEN SETS OF PARAMETER
      # VALUES
      PARAMETERVALS[, , PARAMNUM, CURVENUM] <- 0.5 + asin(sin(ANGLET)) / pi

      # AS THESE VALUES WILL CURRENTLY BE BETWEEN 0 AND 1, TRANSFORM THE
      # DISTRIBUTION TO GIVE TRUE PARAMETER VALUES
      PARAMETERVALS[, , PARAMNUM, CURVENUM] <- efast_parameterdist(
        PARAMETERVALS[, , PARAMNUM, CURVENUM], PMAX, PMIN, NUMSAMPLES,
        length(PARAMETERS))
    }
  }

  return(PARAMETERVALS)
}

#' Output the generated parameter sets for each curve
#'
#' @param FILEPATH Directory where the parameter samples should be output to
#' @param NUMCURVES The number of 'resamples' to perform (see eFAST
#' documentation) - recommend using at least 3
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated.  For eFAST, remember to add a
#' parameter named 'Dummy'
#' @param PARAMETERVALS The parameter sets generated by the eFAST sampling method
output_param_sets_per_curve <- function(FILEPATH, NUMCURVES, PARAMETERS, PARAMETERVALS)
{
  for (CURVENUM in 1:NUMCURVES) {
    for (PARAMNUM in 1:length(PARAMETERS)) {
      parameter_file <- paste(FILEPATH, "/Curve", CURVENUM, "_",
                              PARAMETERS[PARAMNUM], ".csv", sep = "")
      output_params <- PARAMETERVALS[, , PARAMNUM, CURVENUM]
      colnames(output_params) <- c(PARAMETERS)

      write.csv(output_params, parameter_file, quote = FALSE,
                row.names = FALSE)

      message(paste("Parameter Set for ", CURVENUM,
                  " Generated and Output to ", FILEPATH, "/Curve", CURVENUM,
                  "_", PARAMETERS[PARAMNUM], ".csv", sep = ""))
    }
  }
}

Try the spartan package in your browser

Any scripts or data that you put into this service are public.

spartan documentation built on May 2, 2019, 9:39 a.m.