R/adaptive_simulated_annealing.R

Defines functions adaptive.simulated.annealing

Documented in adaptive.simulated.annealing

#' @title Adaptive simulated annealing
#' 
#' @export
#' 
#' @description structure learning using adaptive simulated annealing
#' 
#' @param poset a matrix containing the cover relations
#' @param obs a matrix of observations, where each row correponds to a
#' vector indicating whether an event has been observed (\code{1}) or not
#' (\code{0})
#' @param times an optional vector of sampling times per observation
#' @param lambda.s rate of the sampling process. Defaults to \code{1.0}
#' @param weights a vector containing observation weights
#' @param L number of samples to be drawn from the proposal in the E-step
#' @param sampling sampling scheme to generate hidden genotypes, \code{X}.
#' OPTIONS: \code{"forward"} - generate occurrence times according to current
#' rate parameters, and, from them, generate the hidden genotypes, \code{X};
#' \code{"add-remove"} - generate genotypes, \code{X}, from observed genotypes,
#' \code{Y} using a two-steps proposal. First, pick a move uniformly at random:
#' either to add or to remove an event. Events are chosen to be removed with
#' probability proportional to their rates, and to be added with an inverse
#' probability. Second, make genotypes compatible with the poset by either
#' adding or removing all events incompatible with the poset; \code{"backward"}
#' - enumerate all genotypes with Hamming distance \code{k}; \code{"bernoulli"}
#' - generate genotypes from a Bernoulli distribution with success probability
#' \eqn{p = \epsilon}; \code{"pool"} - generate a pool of compatible genotypes
#' according to current rate parameters and sample \code{K} observations
#' proportional to their Hamming distance;
#' @param max.iter the maximum number of EM iterations. Defaults to \code{100}
#' iterations
#' @param update.step.size number of EM steps after which the number of
#' samples, \code{L}, is doubled. \code{L} is increased, if the difference in
#' the parameter estimates between such consecutive batches is greater than the
#' tolerance level, \code{tol}
#' @param tol convergence tolerance for the error rate and the rate parameters.
#' The EM runs until the difference between the average estimates in the last
#' two batches is smaller than tol, or until \code{max.iter} is reached.
#' @param max.lambda.val an optional upper bound on the value of the rate
#' parameters. Defaults to \code{1e6}
#' @param T0 inital value of the temperature. Defaults to \code{50}
#' @param adap.rate an optional argument specifying the constant adaptation
#' rate. Defaults to \code{0.3}
#' @param acceptance.rate an optional argument specifying a desirable
#' acceptance rate. Defaults to 1/\code{p}, where \code{p} is the number of
#' events or mutations
#' @param step.size an optional argument indicating the number or iterations
#' after which the temperature should be updated. Defaults to \code{50}
#' @param max.iter.asa the maximum number of iterations. Defaults to
#' \code{10000} iterations
#' @param neighborhood.dist an integer value indicating the Hamming distance
#' between the observation and the samples generated by \code{"backward"}
#' sampling. This option is used if \code{sampling} is set to \code{"backward"}.
#' Defaults to \code{1}
#' @param adaptive a boolean variable indicating whether to use an adaptive
#' annealing schedule
#' @param outdir an optional argument indicating the path to the output
#' directory
#' @param thrds number of threads for parallel execution
#' @param verbose an optional argument indicating whether to output logging
#' information
#' @param seed seed for reproducibility
adaptive.simulated.annealing <- function(
  poset, obs, times=NULL, lambda.s=1.0, weights=NULL, L,
  sampling=c('forward', 'add-remove', 'backward', 'bernoulli', 'pool'),
  max.iter=100L, update.step.size=20L, tol=0.001, max.lambda.val=1e6, T0=50,
  adap.rate=0.3, acceptance.rate=NULL, step.size=NULL, max.iter.asa=10000L,
  neighborhood.dist=1L, adaptive=TRUE, outdir=NULL, thrds=1L, verbose=FALSE,
  seed=NULL) {
  
  sampling <- match.arg(sampling)
  N <- nrow(obs)
  
  if (!is.integer(poset))
    poset <- matrix(as.integer(poset), nrow=nrow(poset), ncol=ncol(poset))
  
  if (!is.integer(obs))
    obs <- matrix(as.integer(obs), nrow=N, ncol=ncol(obs))
  
  if (is.null(times)) {
    times <- numeric(N)
    sampling.times.available <- FALSE
  } else {
    sampling.times.available <- TRUE
  }
  if (is.null(weights))
    weights <- rep(1, N)

  if (update.step.size > max.iter)
    update.step.size <- as.integer(max.iter / 5)

  if (is.null(acceptance.rate)) {
    p <- ncol(obs)
    acceptance.rate <- 1/p
  }
  
  if (is.null(step.size))
    step.size <- floor(max.iter.asa/10)

  if (is.null(outdir))
    outdir <- file.path(getwd(), "")
  # else if (!dir.exists(outdir))
  #   outdir <- file.path(getwd(), "")
  
  if (is.null(seed))
    seed <- sample.int(3e4, 1)

  .Call('_adaptive_simulated_annealing', PACKAGE='mccbn', poset, obs, times,
        lambda.s, weights, as.integer(L), sampling, as.integer(max.iter),
        as.integer(update.step.size), tol, max.lambda.val, T0, adap.rate,
        acceptance.rate, as.integer(step.size), as.integer(max.iter.asa),
        as.integer(neighborhood.dist), adaptive, outdir,
        sampling.times.available, as.integer(thrds), verbose, as.integer(seed))
}
cbg-ethz/MC-CBN documentation built on Dec. 15, 2022, 5:42 p.m.