mkin: Kinetic Evaluation of Chemical Degradation Data

Documented in add_err

#' Add normally distributed errors to simulated kinetic degradation data
#' 
#' Normally distributed errors are added to data predicted for a specific
#' degradation model using \code{\link{mkinpredict}}. The variance of the error
#' may depend on the predicted value and is specified as a standard deviation.
#' 
#' @param prediction A prediction from a kinetic model as produced by
#'   \code{\link{mkinpredict}}.
#' @param sdfunc A function taking the predicted value as its only argument and
#'   returning a standard deviation that should be used for generating the
#'   random error terms for this value.
#' @param secondary The names of state variables that should have an initial
#'   value of zero
#' @param n The number of datasets to be generated.
#' @param LOD The limit of detection (LOD). Values that are below the LOD after
#'   adding the random error will be set to NA.
#' @param reps The number of replicates to be generated within the datasets.
#' @param digits The number of digits to which the values will be rounded.
#' @param seed The seed used for the generation of random numbers. If NA, the
#'   seed is not set.
#' @importFrom stats rnorm
#' @return A list of datasets compatible with \code{\link{mmkin}}, i.e. the
#'   components of the list are datasets compatible with \code{\link{mkinfit}}.
#' @author Johannes Ranke
#' @references Ranke J and Lehmann R (2015) To t-test or not to t-test, that is
#' the question. XV Symposium on Pesticide Chemistry 2-4 September 2015,
#' Piacenza, Italy
#' https://jrwb.de/posters/piacenza_2015.pdf
#' @examples
#' 
#' # The kinetic model
#' m_SFO_SFO <- mkinmod(parent = mkinsub("SFO", "M1"),
#'                      M1 = mkinsub("SFO"), use_of_ff = "max")
#' 
#' # Generate a prediction for a specific set of parameters
#' sampling_times = c(0, 1, 3, 7, 14, 28, 60, 90, 120)
#' 
#' # This is the prediction used for the "Type 2 datasets" on the Piacenza poster
#' # from 2015
#' d_SFO_SFO <- mkinpredict(m_SFO_SFO,
#'                          c(k_parent = 0.1, f_parent_to_M1 = 0.5,
#'                            k_M1 = log(2)/1000),
#'                          c(parent = 100, M1 = 0),
#'                          sampling_times)
#' 
#' # Add an error term with a constant (independent of the value) standard deviation
#' # of 10, and generate three datasets
#' d_SFO_SFO_err <- add_err(d_SFO_SFO, function(x) 10, n = 3, seed = 123456789 )
#' 
#' # Name the datasets for nicer plotting
#' names(d_SFO_SFO_err) <- paste("Dataset", 1:3)
#' 
#' # Name the model in the list of models (with only one member in this case) for
#' # nicer plotting later on.  Be quiet and use only one core not to offend CRAN
#' # checks
#' \dontrun{
#' f_SFO_SFO <- mmkin(list("SFO-SFO" = m_SFO_SFO),
#'                    d_SFO_SFO_err, cores = 1,
#'                    quiet = TRUE)
#' 
#' plot(f_SFO_SFO)
#' 
#' # We would like to inspect the fit for dataset 3 more closely
#' # Using double brackets makes the returned object an mkinfit object
#' # instead of a list of mkinfit objects, so plot.mkinfit is used
#' plot(f_SFO_SFO[[3]], show_residuals = TRUE)
#' 
#' # If we use single brackets, we should give two indices (model and dataset),
#' # and plot.mmkin is used
#' plot(f_SFO_SFO[1, 3])
#' }
#' 
#' @export
add_err <- function(prediction, sdfunc, secondary = c("M1", "M2"),
  n = 10, LOD = 0.1, reps = 2, digits = 1, seed = NA)
{
  if (!is.na(seed)) set.seed(seed)

  prediction <- as.data.frame(prediction)

  # The output of mkinpredict is in wide format
  d_long = mkin_wide_to_long(prediction, time = "time")

  # Set up the list to be returned
  d_return = list()

  # Generate datasets one by one in a loop
  for (i in 1:n) {
    d_rep = data.frame(lapply(d_long, rep, each = reps))
    d_rep$value = rnorm(length(d_rep$value), d_rep$value, sdfunc(d_rep$value))

    d_rep[d_rep$time == 0 & d_rep$name %in% secondary, "value"] <- 0

    # Set values below the LOD to NA
    d_NA <- transform(d_rep, value = ifelse(value < LOD, NA, value))

    # Round the values for convenience
    d_NA$value <- round(d_NA$value, digits)

    d_return[[i]] <- d_NA
  }

  return(d_return)
}