R/gap_seibert.R

Defines functions seibert.breed breed.use1 breed.use2 breed.mutate breed.rinterpolate breed.rinterpolate.param.real breed.rinterpolate.param.discrete breed.rinterpolate.param.distributed breed.rinterpolate.param.distr.int breed.rinterpolate.param.fixed seibert.evolve seibert.gapo

Documented in breed.mutate breed.rinterpolate breed.rinterpolate.param.discrete breed.rinterpolate.param.distributed breed.rinterpolate.param.distr.int breed.rinterpolate.param.fixed breed.rinterpolate.param.real breed.use1 breed.use2 seibert.breed seibert.evolve seibert.gapo

#' Combine two values of a parameter type using weighted probabilities
#'
#' @param param A 'param' object
#' @param value1 The first value
#' @param value2 The second value
#' @param prob A named \code{list} in the form \code{functionname=probability}.
#'   The function described by \code{functionname} should take the parameter as
#'   its first argument and (at least) two vector parameters of arbitrary length
#'   and return a vector of identical length. Examples include
#'   \link{breed.use1}, \link{breed.use2}, \link{breed.rinterpolate}, and
#'   \link{breed.mutate}.
#' @param seed A random number seed for replicability.
#' @param cols A combination of 'result', 'func', 'value1', and/or 'value2'. Passing multiple column
#'   names will result in a data.frame; passing a single column will result in a vector. This
#'   is helpful for debugging.
#' @param validate TRUE to validate arguments, FALSE otherwise. In most cases this function gets
#'   called hundreds to thousands of times using values that were generated by the function
#'   itself. Using validation may help track down bugs when unexpected output occurs.
#'
#' @return A vector of new parameter values, or a data.frame if multiple \code{cols} are
#'   passed.
#' @export
#'
#' @references
#' Seibert, J. 2000. Multi-criteria calibration of a conceptual runoff model using a
#' generic algorithm. Hydrology and Earth System Sciences 4(2): 215–224.
#'
#' @examples
#' p <- param.discrete(c('heads', 'tails', 'edge of coin'), weights=c(0.45, 0.45, 0.1))
#' v1 <- random.value(p, n=10)
#' v2 <- random.value(p, n=10)
#' seibert.breed(p, v1, v2, cols=c('value1', 'value2', 'func', 'result'))
#'
seibert.breed <- function(param, value1, value2,
                          prob=list(breed.use1=0.41, breed.use2=0.41,
                                    breed.rinterpolate=0.16, breed.mutate=0.02),
                          seed=NULL, cols="result", validate=TRUE) {
  if(validate) {
    if(!is.param(param)) stop("Argument 'param' must be of type 'param'")
    # check columns
    if(length(cols) == 0) stop("Argument 'cols' must have length > 0")
    badcols <- cols[!(cols %in% c('value1', 'value2', 'func', 'result'))]
    if(length(badcols) > 0) stop("Invalid columns passed as 'cols': ", paste(badcols, collapse=", "))

    # check function names
    if(!is.list(prob)) stop("Argument 'prob' must be of type 'list'")
    if(length(prob) == 0) stop("Argument 'prob' must have length > 0")
    funs <- names(prob)
    if(is.null(funs) || any(nchar(funs) == 0)) stop("Argument 'prob' must be a named list")
    lapply(names(prob), match.fun)

    # check probabilities
    if(any(!is.finite(unlist(prob)))) stop("Function probabilities must be finite")

    # check for bad values
    badvals <- c(value1[!is.param.valid(param, value1)], value2[!is.param.valid(param, value2)])
    if(length(badvals) > 0) stop("Invalid values passed: ", paste0("'", badvals, "'", collapse=", "))
  }

  # if zero length values are passed
  if((length(value1) == 0) && (length(value2) == 0)) {
    if(length(cols) > 1) {
      # return emtpy data frame with requested columns
      return(data.frame(sapply(cols, function(col) NA, simplify=FALSE))[FALSE, ])
    } else {
      return(NULL)
    }
  }

  # making a data frame ensures identical length for value1 and value2
  df <- data.frame(value1=value1, value2=value2, stringsAsFactors = FALSE)

  # param args can override function probabilities
  prob <- sapply(names(prob), function(n) {
    if(!is.null(param[[n]])) {
      param[[n]]
    } else {
      prob[[n]]
    }
  })

  # seed for replicability (before anything random happens)
  if(!is.null(seed)) set.seed(seed)

  df$func <- names(prob)[sample(length(prob), size=nrow(df), prob=unlist(prob), replace=TRUE)]
  df$result <- NA
  # probably a more efficient way to do this
  for(fun in names(prob)) {
    filter <- df$func == fun
    if(sum(filter) == 0) next
    df$result[filter] <- do.call(fun, list(param, df$value1[filter], df$value2[filter]))
  }

  if(length(cols) > 1) {
    df[, cols, drop=FALSE]
  } else {
    df[[cols]]
  }
}

#' Parameter breeding functions
#'
#' @param param An object of type 'param'
#' @param value1 The first parameter value
#' @param value2 The second parameter value
#'
#' @return A vector of valid parameter values
#' @export
#'
#' @rdname breeding
breed.use1 <- function(param, value1, value2) value1

#' @rdname breeding
#' @export
breed.use2 <- function(param, value1, value2) value2

#' @rdname breeding
#' @export
breed.mutate <- function(param, value1, value2) random.value(param, n=length(value1))

#' @rdname breeding
#' @export
breed.rinterpolate <- function(param, value1, value2) UseMethod("breed.rinterpolate")

#' @rdname breeding
#' @export
breed.rinterpolate.param.real <- function(param, value1, value2) {
  runif(n=length(value1), 0, 1) * (value2 - value1) + value1
}

#' @rdname breeding
#' @export
breed.rinterpolate.param.discrete <- function(param, value1, value2) {
  if(param$ordered) {
    int1 <- match(value1, param$choices)
    int2 <- match(value2, param$choices)
    param$choices[round(breed.rinterpolate.param.real(NULL, int1, int2))]
  } else {
    c(value1, value2)[sample(2, size = length(value1), replace = TRUE)]
  }
}

#' @rdname breeding
#' @export
breed.rinterpolate.param.distributed <- function(param, value1, value2) {
  breed.rinterpolate.param.real(param, value1, value2)
}

#' @rdname breeding
#' @export
breed.rinterpolate.param.distr.int <- function(param, value1, value2) {
  round(breed.rinterpolate.param.real(param, value1, value2))
}

#' @rdname breeding
#' @export
breed.rinterpolate.param.fixed <- function(param, value1, value2) {
  # mutation generates a random value, that, for a fixed parameter,
  # will always be the initial value
  breed.mutate(param, value1, value2)
}

#' Evolve a population of parameter values
#'
#' @param params A \code{list} of parameters with identical names to \code{values}
#' @param values A \code{list} of parameter values with identical names to \code{params}.
#'   If all parameters are named parameters, this can be a data.frame.
#' @param probs A vector of probabilities with which to weight sampling of the breeding couples.
#'   Can be NULL, indicating there is no preference.
#' @param rescaler Pass TRUE to rescale \code{probs} to the range (0, 1). Or optionally a function
#'   that transorms probs to valid probability weights. This is useful if
#'   the value of probs used is sometimes negative from some objective function.
#' @param seed Seed for random operations
#' @param validate Pass FALSE to skip validation (may be faster).
#' @param breedargs Passed to \link{seibert.breed}; may be overridden by parameters.
#'
#' @return A \code{list} of parameters with identical names to \code{params} and \code{values}
#' @export
#'
#' @examples
#' params <- list(param.discrete(c('heads', 'tails')),
#'                param.real(c(0, 1)),
#'                beersdrank=param.normal.int(5, 1),
#'                taxes=param.fixed(TRUE))
#' values <- lapply(params, initial.value, n=10)
#'
#' seibert.evolve(params, values)
#'
#' # 'select' for heads
#' for(i in 1:10) {
#'   values <- seibert.evolve(params, values,
#'                            ifelse(values[[1]]=="heads", 0.6, 0.4))
#' }
#'
seibert.evolve <- function(params, values, probs=NULL, rescaler=c("rescale_none", "rescale", "squish"),
                           seed=NULL, validate=TRUE,
                           breedargs=list(breed.use1=0.41, breed.use2=0.41,
                                          breed.rinterpolate=0.16, breed.mutate=0.02)) {
  rescaler <- match.arg(rescaler)
  if(validate) {
    if(!is.list(params)) stop("Argument 'params' must be a list")
    if(any(!sapply(params, is.param))) stop("Argument 'params' must be a list of objects of type 'param'")
    if(!is.list(values)) stop("Argument 'values' must be a list or data frame")
    if(!is.null(probs) && (nrow(data.frame(values)) != length(probs))) {
      stop("Rows in values and length of probs must be equal")
    }
  }

  if(is.null(probs)) {
    probs <- rep(1, unique(sapply(values, length)))
  } else {
    probs <- get(rescaler)(probs)
  }

  # seed for replicability (before anything random happens)
  if(!is.null(seed)) set.seed(seed)

  # first parameter set from the breeding pair
  rows1 <- sample(length(probs), size = length(probs), replace = TRUE, prob=probs)
  # second parameter set from the breeding pair
  rows2 <- sample(length(probs), size = length(probs), replace = TRUE, prob=probs)

  # apply by param and values, which should have identical names (or no names)
  mapply(seibert.breed, params,
         lapply(values, "[", rows1),
         lapply(values, "[", rows2),
         validate=validate, prob=list(breedargs), SIMPLIFY = FALSE)
}

#' Seibert GAP function optimisation
#'
#' @param .fun The function to be called
#' @param ... Arguments to be passed to .fun expressed as param objects.
#' @param .objective An optional function to be called on the result of .fun to produce an
#'   objective value. If this is not passed, the result of .fun is used to maximize.
#' @param .validate Pass FALSE to skip validation. May be faster.
#' @param .seed Seed for random operations.
#' @param .rescaler An optional rescaling method used to rescale the output of .objective.
#' @param .generations The number of generations to evolve the population.
#' @param .pop The population size to use.
#' @param .keepbest Pass TRUE to discard the results of a next generation if no individuals have
#'   an improved objective value.
#' @param .breedargs Passed to \link{seibert.breed}: default values for parameter breeding. These
#'   may be overridden by additional arguments to each parameter.
#' @param .progress A plyr progress bar to use, or 'none'.
#' @param .output Use to output the best parameters, the last generation, or all parameters.
#'
#' @return A data.frame with columns for each named parameter
#' @export
#'
seibert.gapo <- function(.fun, ..., .objective=function(x) x, .validate=TRUE, .seed=NULL,
                         .rescaler=c("rescale_none", "rescale", "squish"),
                         .generations=100, .pop=50, .keepbest=TRUE,
                         .breedargs=list(breed.use1=0.41, breed.use2=0.41,
                                         breed.rinterpolate=0.16, breed.mutate=0.02),
                         .progress=c("none", "text", "tk"), .output=c("best", "last", "all")) {
  .fun <- match.fun(.fun)
  .objective <- match.fun(.objective)
  .output <- match.arg(.output)
  .rescaler <- match.arg(.rescaler)

  if(.output == "all") {
    ply <- plyr::adply
  } else {
    ply <- plyr::a_ply
  }

  # seed for replicability (before anything random happens)
  if(!is.null(.seed)) set.seed(.seed)

  # generate param list
  params <- lapply(list(...), as.param)
  # generate population
  values <- lapply(params, initial.value, n=.pop)
  # generate initial objective values
  if(.progress != "none") message("Generating initial population")
  objects <- do.call(mapply, c(list(.fun), values, list(SIMPLIFY=FALSE, USE.NAMES=FALSE)))
  objectives <- sapply(objects, .objective)

  # use environment to keep track of objecives and values
  valenv <- new.env(parent=emptyenv())
  valenv$values <- values
  valenv$objectives <- objectives

  # loop through generations
  out <- ply(expand.grid(generation=1:.generations), .margins=1, .fun=function(row) {
    newvalues <- seibert.evolve(params, valenv$values, objectives, rescaler = .rescaler,
                                breedargs=.breedargs, validate = .validate)
    newobjects <- do.call(mapply, c(list(.fun), newvalues, list(SIMPLIFY=FALSE, USE.NAMES=FALSE)))
    newobjectives <- sapply(newobjects, .objective)
    if(.keepbest && (max(newobjectives) < max(objectives))) {
      return(data.frame(newvalues, .objective=newobjectives, stringsAsFactors = FALSE))
    } else {
      assign("objectives", newobjectives, envir = valenv)
      assign("values", newvalues, envir = valenv)
      return(data.frame(newvalues, .objective=newobjectives, stringsAsFactors = FALSE))
    }
  }, .progress=.progress)

  if(.output == "all") {
    return(out)
  } else if(.output == "last") {
    valenv$values$.objective <- valenv$objectives
    return(valenv$values)
  } else {
    values$.objective <- objectives
    return(lapply(valenv$values, "[", which.max(valenv$objectives)))
  }
}
paleolimbot/easyoptim documentation built on May 24, 2019, 6:12 p.m.