R/generate_data.R

Defines functions GenerateData GenerateData.character GenerateData.ecicModel GenerateData.norm GenerateData.rwalk GenerateData.lmECIC GenerateData.spanos1 GenerateData.spanos2 GenerateData.spanos3 GenerateDataMulti GenerateDataMulti.character GenerateDataMulti.ecicModel GenerateDataMulti.norm GenerateDataMulti.rwalk GenerateDataMulti.lmECIC GenerateDataMultiOld GenerateDataBest

Documented in GenerateData GenerateDataBest GenerateDataMulti

library(magrittr)


# GenerateData -----------------------------------------------------------------
#' Generate a sample from a fitted model
#'
#' @param n: Sample size.
#' @param true: A string giving the name of the model to generate data from.
#' @param param: A vector of parameters specifying a fitted true model.
#' @param best: A string giving the name of the model to be given as best.
#' @param models: A string vector of model names for selecting between.
#' @param N: Number of bootstrap samples.
#' @param ic: String giving which information criterion to use (e.g. 'AICc', 'BIC')
#
#' @return: A list containing the following:.
#'
#' @examples
#' GenerateData(25, "norm", c(mu = 0.3, sd = 1.2))
#'
#' @export
GenerateData <- function(n, model, parameters = list()){

  UseMethod("GenerateData", model)

}
#' @export
GenerateData.character <- function(n, model, parameters = list()){
  model2 = tryCatch(ecicModel(model),
                    error = function(e){
                      stop(paste(model, "is not a valid ecicModel type."), call. = FALSE)
                    })
  GenerateData(n, model2, parameters)
}
#' @export
GenerateData.ecicModel = function(n, model, parameters){
  parameters = as.list(parameters)
  if(model$k > 0){
    # Check for extraneous parameters.
    for (parameter in names(parameters) ){
      if ( ! ( parameter %in% model$parameter.names ) ) {
        message(paste("Model", model$name, "has parameters",
                      paste(model$parameter.names, collapse = ", "),
                      "so the argument", parameter, "was ignored."))
        parameters[[parameter]] <- NULL
      }
    }
    # Check for duplicate parameters.
    for (parameter in names(parameters)){
      if (parameter %in% names(model$fixed.parameters)){
        message(paste("The given model has a fixed value of", parameter, "=",
                      model$fixed.parameters[[parameter]], "so the argument",
                      parameter,"=", parameters[parameter], "was ignored."))
        parameters[[parameter]] <- NULL
      }
    }
  }
    # Check if all parameters are supplied
    parameters <- c(parameters, model$fixed.parameters)

  for (parameter in model$parameter.names){
    if (!(parameter %in% names(parameters))){
      stop(paste("Missing value for parameter ", parameter, ".", sep = ""))
    }
  }
  NextMethod("GenerateData", model)
}

#' @export
GenerateData.norm <- function(n, model, parameters){
  # Check values of mean and sd.
  assert_that(is.numeric(parameters$sd),
              length(parameters$sd) == 1, parameters$sd > 0)
  assert_that(is.numeric(parameters$mean), length(parameters$mean) == 1)

  rnorm(n, parameters$mean, parameters$sd)
}
#' @export
GenerateData.rwalk <- function(n, model, parameters){
  assert_that(is.numeric(parameters$step.sd),
              length(parameters$step.sd) == 1, parameters$step.sd > 0)
  assert_that(is.numeric(parameters$step.mean), length(parameters$step.mean) == 1)

  cumsum(rnorm(n, parameters$step.mean, parameters$step.sd))
}
#' @export
GenerateData.lmECIC <- function(n, model, parameters){
  k = length(parameters) - 1
  assert_that(is.numeric(parameters$sd),
              length(parameters$sd) == 1, parameters$sd > 0)
  if(k != model$k) {
    stop("Incorrect number of parameters.")
  }
  if(model$n != n){
    stop(paste("Incorrect n value, expecting n = ", model$n, ".", sep =""))
  }
  fit = model$model.matrix %*% as.matrix(unlist(parameters[1:k]))
  fit + rnorm(n, 0, parameters$sd)
}
#' @export
GenerateData.spanos1 <- function(n, model, param){
  # Computes the log-likelihood and fitted parameters for a dataset and a model.
  #
  # Args:
  #   data:  compatible with the specified model.
  #   n: Sample size.
  #   model: A string specifying the model to compute the log-likehood for.
  #   compress: A boolean specifying if output should be of list or vector type
  #
  # Returns:
  #   A vector/matrix data sample generated by the given model.
  a0 <- parameters[1]
  a1 <- parameters[2]
  sd <- parameters[3]
  x <- spanos.x
  return(a0+(a1*x) + rnorm(n, 0, sd))
}
#' @export
GenerateData.spanos2 <- function(n, model, param){
  # Computes the log-likelihood and fitted parameters for a dataset and a model.
  #
  # Args:
  #   data:  compatible with the specified model.
  #   n: Sample size.
  #   model: A string specifying the model to compute the log-likehood for.
  #   compress: A boolean specifying if output should be of list or vector type.
  #
  # Returns:
  #   A vector/matrix data sample generated by the given model.
  b0 <- parameters[1]
  b1 <- parameters[2]
  b2 <- parameters[3]
  b3 <- parameters[4]
  sd <- parameters[5]
  x <- spanos.x
  return(b0 + (b1*x) + (b2 * x^2) + (b3 * x^3) + rnorm(n, 0, sd))
}
#' @export
GenerateData.spanos3 <- function(n, model, param){
  # Computes the log-likelihood and fitted parameters for a dataset and a model.
  #
  # Args:
  #   data:  compatible with the specified model.
  #   n: Sample size.
  #   model: A string specifying the model to compute the log-likehood for.
  #   compress: A boolean specifying if output should be of list or vector type
  #
  # Returns:
  #   A vector/matrix data sample generated by the given model.
  b0 <- parameters[1]
  b1 <- parameters[2]
  b2 <- parameters[3]
  sd <- parameters[4]
  x <- spanos.x
  return(b0 + (b1*x) + (b2 * x^2) + rnorm(n, 0, sd))
}



# Generate multiple data samples -----------------------------------------------
#' Generate multiple samples from a fitted model
#'
#' @param n: Sample size.
#' @param true: A string giving the name of the model to generate data from.
#' @param param: A vector of parameters specifying a fitted true model.
#' @param best: A string giving the name of the model to be given as best.
#' @param models: A string vector of model names for selecting between.
#' @param N: Number of bootstrap samples.
#' @param ic: String giving which information criterion to use (e.g. 'AICc', 'BIC')
#
#' @return: A list containing the following:.
#'
#' @examples
#' GenerateData(25, "norm", c(mu = 0.3, sd = 1.2))
#' @export
GenerateDataMulti <- function(n, model, parameters, N){
  UseMethod("GenerateDataMulti", model)
}
#' @export
GenerateDataMulti.character <- function(n, model, parameters = list(), N){
  model2 = tryCatch(ecicModel(model),
                    error = function(e){
                      stop(paste(model, "is not a valid ecicModel type."), call. = FALSE)
                    })
  GenerateDataMulti(n, model2, parameters, N)
}
#' @export
GenerateDataMulti.ecicModel <- function(n, model, parameters= list(), N){
  parameters = as.list(parameters)
  if(model$k > 0){
    # Check for extraneous parameters.
    for (parameter in names(parameters) ){
      if ( ! ( parameter %in% model$parameter.names ) ) {
        message(paste("Model", model$name, "has parameters",
                      paste(model$parameter.names, collapse = ", "),
                      "so the argument", parameter, "was ignored."))
        parameters[[parameter]] <- NULL
      }
    }
    # Check for duplicate parameters.
    for (parameter in names(parameters)){
      if (parameter %in% names(model$fixed.parameters)){
        message(paste("The given model has a fixed value of", parameter, "=",
                      model$fixed.parameters[[parameter]], "so the argument",
                      parameter,"=", parameters[parameter], "was ignored."))
        parameters[[parameter]] <- NULL
      }
    }
    # Check if all parameters are supplied
    parameters <- c(parameters, model$fixed.parameters)
  }
  for (parameter in model$parameter.names){
    if (!(parameter %in% names(parameters))){
      stop(paste("Missing value for parameter ", parameter, ".", sep = ""))
    }
  }
  NextMethod("GenerateDataMulti", model)
}
#' @export
GenerateDataMulti.norm <- function(n, model, parameters, N){
  rnorm(n * N, parameters$mean, parameters$sd) %>%
    matrix(ncol = N)
}
#' @export
GenerateDataMulti.rwalk <- function(n, model, parameters, N){
  rnorm(n * N, parameters$step.mean, parameters$step.sd) %>%
    matrix(ncol <- N) %>%
    apply(1, cumsum)
}
#' @export
GenerateDataMulti.lmECIC <- function(n, model, parameters, N){
  k = length(parameters) - 1
  assert_that(is.numeric(parameters$sd),
              length(parameters$sd) == 1, parameters$sd > 0)
  if(k != model$k) {
    stop("Incorrect number of parameters.")
  }
  if(model$n != n){
    stop(paste("Incorrect n value, expecting n = ", model$n, ".", sep =""))
  }
  fit = model$model.matrix %*% as.matrix(unlist(parameters[1:k]))
  rep(fit, N) + rnorm(n*N, 0, parameters$sd) %>% matrix(ncol = N)
}


GenerateDataMultiOld <- function(n, model, parameters, N){
  if (model == "norm2")
    return(rnorm(n*N, parameters[1], parameters[2]) %>% matrix(ncol <- N))
  if (model == "norm0")
    return(rnorm(n*N, 0, 1) %>% matrix(ncol <- N))
  if (model == "norm1")
    return(rnorm(n*N, parameters[1], 1) %>% matrix(ncol <- N))
  if(model == "uwalk"){
    steps <- rnorm(n*N, 0, parameters[1]) %>% matrix(ncol <- N)
    return(apply(steps, 2, cumsum))
  }
  if(model == "gwalk"){
    steps <- rnorm(n*N, parameters[1], parameters[2]) %>% matrix(ncol <- N)
    return(apply(steps, 2, cumsum))}
  if (model == "GRW"){
    anc <- param$Parameters[[1]]
    ms  <- param$Parameters[[2]]
    vs  <- param$Parameters[[3]]
    vv  <- param$vv[1]
    nn  <- param$nn
    return(lapply(1:N, function(x) sim.GRW(n, ms, vs, vv, nn)))
  }
  if (model == "URW") {
    anc <- param$Parameters[[1]]
    vs  <- param$Parameters[[2]]
    vv  <- param$vv[1]
    nn  <- param$nn
    return(lapply(1:N, function(x) sim.GRW(n, ms, vs, vv, nn)))
  }
  if (model == "Stasis"){
    theta  <- param$Parameters[[1]]
    omega  <- param$Parameters[[2]]
    vv  <- param$vv[1]
    nn  <- param$nn
    return(lapply(1:N, function(x) sim.Stasis(n, theta, omega, vv, nn)))
  }
  if (model == "seg") return(sapply(1:N, function(x) GenerateData(n, "seg", param)))
  if (model == "seg2") return(sapply(1:N, function(x) GenerateData(n, "seg2", param)))
  if (model == "smooth") return(sapply(1:N, function(x) GenerateData(n, "smooth", param)))
  if (model == "smooth2") return(sapply(1:N, function(x) GenerateData(n, "smooth2", param)))
  if (model == "lm1") return(sapply(1:N, function(x) GenerateData(n, "lm1", param)))
  if (model == "lm2") return(sapply(1:N, function(x) GenerateData(n, "lm2", param)))
  if (model == "lm3") return(sapply(1:N, function(x) GenerateData(n, "lm3", param)))

}

# Generate data samples with a given best performing model ---------------------
#' Generate multiple samples from a fitted model with specified best performing
#' model
#'
#' @param n: Sample size.
#' @param true: A string giving the name of the model to generate data from.
#' @param param: A vector of parameters specifying a fitted true model.
#' @param best: A string giving the name of the model to be given as best.
#' @param models: A string vector of model names for selecting between.
#' @param N: Number of bootstrap samples.
#' @param ic: String giving which information criterion to use (e.g. 'AICc', 'BIC')
#
#' @return: A list containing the following:.
#'
#' @examples
#' GenerateData(25, "norm", c(mu = 0.3, sd = 1.2))
#' @export
GenerateDataBest <- function(n, true, parameters, best, models, N, ic = 'AIC', ...){
  models = ecicModelList(models)
  best.id = best$ID
  true.id = true$ID
  if (models[[1]]$data.type==1){
    check <- suppressMessages(GenerateData(n, models[[true.id]], parameters))

  if(is.numeric(check)){
    mins <- 0
    best.ix <- which(names(models)==best.id)
    it1 <- 0
    nonzero <- FALSE
    while(!(best.ix %in% mins) &  it1 < 1000){
      data <- suppressMessages(GenerateDataMulti(n, models[[true.id]], parameters, N*3))
      scores <- ICMultiMulti(models, data, ic)$ic
      mins <- apply(scores,1,which.min)
      data.best2 <- as.matrix(data[,mins==best.ix])
      if(best.ix %in% mins) nonzero <- TRUE
      it1 <- it1 + N
    }
    if(!nonzero) return(NA)
    it <- 0
    while(dim(data.best2)[2] < N & it < 4){
      data <- suppressMessages(GenerateDataMulti(n, models[[true.id]], parameters, N*3))
      scores <- ICMultiMulti(models, data, ic)$ic
      mins <- apply(scores,1,which.min)
      data.best <- data[,mins==best.ix]
      data.best2 <- cbind(data.best2, data.best)
      it <- it + 1
    }
    if(dim(data.best2)[2] > N) out <- as.matrix(data.best2[,1:N])
    if(dim(data.best2)[2] <= N) out <- as.matrix(data.best2)
    return(out)}
  }
  if (models[[1]]$data.type=="paleoTS"){
    mins <- 0
    best.ix <- which(names(models)==best.id)
    it1 <- 0
    nonzero <- FALSE
    while(!(best.ix %in% mins) &  it1 < 1000){
      data <- suppressMessages(GenerateDataMulti(n, models[[true.id]], parameters, N*3))
      fits <- ICMultiMulti(models, data, ic)
      ics <- sapply(fits, function(x) sapply(x, function(y) y$ic))
      mins <- apply(ics,1,which.min)
      data.best2 <- as.matrix(data[mins==best.ix])
      if(best.ix %in% mins) nonzero <- TRUE
      it1 <- it1 + N
    }
    if(!nonzero)return(0)
    it <- 0
    while(length(data.best2) < N & it < 4){
      data <- suppressMessages(GenerateDataMulti(n, models[[true.id]], parameters, N*3))
      scores <- ICMultiMulti(models, data, ic)
      ics <- sapply(scores, function(x) sapply(x, function(y) y$ic))
      mins <- apply(ics,1,which.min)
      data.best <- data[mins==best.ix]
      data.best2 <- c(data.best2, data.best)
      it <- it + 1
    }
    if(length(data.best2) > N) out <- data.best2[1:N]
    if(length(data.best2) <= N) out <- data.best2
    return(out)}
  }
mcullan/ecic documentation built on Sept. 3, 2019, 9:57 a.m.