R/sim9fast.R

#'Sim9 Co-occurrence Randomization Algorithm
#'@description An improved implementation of the sequential swap algorithm.
#'@param speciesData a dataframe in which rows are species, columns are sites,
#' and the entries indicate the absence (0) or presence (1) of a species in a 
#' site. Empty rows and empty columns should not be included in the matrix.
#'@param algo the algorithm to use, must be "sim1", "sim2", "sim3", "sim4", "sim5", "sim6", "sim7", "sim8", "sim9", "sim10"; default is "sim9".
#'@param metric the metric used to calculate the null model: choices are "species_combo", "checker", "c_score", "c_score_var", "c_score_skew", "v_ratio"; default is "c_score".
#'@param nReps the number of replicate null assemblages to create; default is 1000 replicates.
#'@param saveSeed TRUE or FALSE. If TRUE the current seed is saved so the simulation can be repeated; default is FALSE.
#'@param burn_in The number of burn_in iterations to use with the simFast algorithm; default is 500 burn-in replicates.
#'@param algoOpts a list containing all the options for the specific algorithm you want to use. Must match the algorithm given in the `algo` argument.
#'@param metricOpts a list containing all the options for the specific metric you want to use. Must match the metric given in the `metric` argument.
#'@param suppressProg TRUE or FALSE. If true, display of the progress bar in the console is suppressed; default is FALSE. This setting is useful for creating markdown documents with `knitr`.
#'@details Generating a set of random matrices with fixed row and column sums is a challenging computational problem. In the ecological literature, these matrices have been created by an MCMC "sequential swap" algorithm (Gotelli 2000). Two rows are two columns are chosen randomly ,and if the 4 cells form a 01/10 pattern, the cell values can be swapped to 10/01 and then replaced in the matrix. This generates a slightly different matrix with the same row and column totals. If the cells cannot be swapped, the trial is discarded. Because only 4 cells are reshuffled, it takes many successive swaps to eliminate transient effects as the matrix moves away from the original configuration and approaches a stationary distribution. A second disadvantage of the sequential swap is that all matrices are not sampled equiprobably because the failed swaps are discarded. This bias seems small for binary matrices that are typically generated by ecological studies (< 100 x 100), but could be important for "big data" applications.
#'
#'EcoSimR uses an unbiased and more efficient algorithm, which Strona et al. (2014) have recently dubbed the "curveball algorithm". In this algorithm, two rows from the matrix are randomly chosen to create a submatrix. Within the submatrix, columns in which the column sums are equal to zero are randomly swapped. The resulting submatrix is then returned to the full matrix, with modified values in two of the rows. If no swapping is possible (which is an improbable event for most ecological matrices), the unswapped matrix is still retained. The curveball algorithm is much more efficient than the sequential swap because most iterations reshuffle many elements in the matrix simultaneously. Strona et al. (2014) show empirically that this algorithm gives unbiased results. However, the resulting MCMC chains will still exhibit autocorrelation for consecutive matrices, especially if the matrix is very large. Future versions of EcoSimR will allow for a thinning parameter to avoid using every sequential matrix from the MCMC chain. The current version of EcoSimR allows for control over the burn-in period and generates a burn-in plot so the user can see whether stationarity has been achieved.
#'
#'@references
#' Chen, Y., P. Diaconis, S.P. Holmes, and J.S. Liu. 2005. Sequential Monte Carlo methods for statistical analysis of tables. JASA 100: 109-120.
#'
#'Cobb, G. W., and Chen, Y.-P. 2003. An Application of Markov Chain Monte Carlo to Community Ecology. American Mathematical Monthly 110: 265-288.
#'
#'Gotelli, N.J. 2000. Null model analysis of species co-occurrence patterns. Ecology 81: 2606-2621.
#'
#' Strona. G., D. Nappo, F. Boccacci, S. Fattorini, and J. San-Miguel-Ayanz. 2014. A fast and unbiased procedure to randomize ecological binary matrices with fixed row and column totals. Nature Communications 5:4114 | DOI: 10.1038/ncomms5114.

#'
#'@examples \dontrun{
#' 
#' ## Run the null model
#' finchMod <- cooc_null_model(dataWiFinches, algo="sim1",nReps=1000000,burn_in = 500)
#' ## Summary and plot info
#' summary(finchMod)
#' plot(finchMod,type="burn_in")
#' plot(finchMod,type="hist")
#' plot(finchMod,type="cooc")
#'}
#'
#'@export





sim9 <- function (speciesData,algo,metric, nReps = 1000 , saveSeed = FALSE,burn_in = 0,algoOpts = list(),metricOpts = list(),suppressProg = TRUE)
{
  
  if(saveSeed){
    randomSeed <- .Random.seed
  } else {
    randomSeed <- NULL
  }

  ## Convert to matrix for type consistency
  if(!is.matrix(speciesData)){ speciesData <- as.matrix(speciesData)}
  
  ### Check for row names hidden in the data frame and automagically strip them.
  
  if(suppressWarnings(is.na(as.numeric(speciesData[2,1])))){
    speciesData <- speciesData[,-1] 
    class(speciesData) <- "numeric"
  }
  
  
  Start.Time <- Sys.time()
  metricF <- get(metric)
  
  Obs <- metricF(speciesData)
  #Trim the matrix to be just rowssums > 0 
  msim <- speciesData[rowSums(speciesData) > 0, ]
  ifelse(burn_in == 0, burn_in <- max(1000,10*nrow(msim)),burn_in <- burn_in)
  burn.in.metric <- vector(mode="numeric",length = burn_in)
  simulated.metric <- vector(mode="numeric",length = nReps)
  # run sequential swap for burn in series
  if(suppressProg){
    bi_pb <- txtProgressBar(min = 0, max = nReps, style = 3, file = stderr())
  } else{
    cat("Burn-in Progress \n")
    bi_pb <- txtProgressBar(min = 0, max = nReps, style = 3)
  }
  for (i in 1:burn_in)
  {
    msim <-sim9_single(msim)
    burn.in.metric[i] <- metricF(msim)
    setTxtProgressBar(bi_pb, i)
  }
  close(bi_pb)
  # run sequential swap for simulated series
  if(suppressProg){
    stat_pb <- txtProgressBar(min = 0, max = nReps, style = 3, file = stderr())
  } else{
    cat("Swap Progress \n")
    
    stat_pb <- txtProgressBar(min = 0, max = nReps, style = 3)
  }
  for (i in 1: nReps)
  {
    msim <-sim9_single(msim)
    simulated.metric[i] <- metricF(msim)    
    setTxtProgressBar(stat_pb, i)
  }
  close(stat_pb)
  
  Sim <- simulated.metric
  End.Time <- Sys.time()
  Elapsed.Time <- format(End.Time-Start.Time,digits=2)
  Time.Stamp <- date()

  sim9.fast.out <- list(Obs=Obs,Sim=Sim, Elapsed.Time=Elapsed.Time, Time.Stamp=Time.Stamp,Metric = metric, Algorithm = algo, N.Reps = nReps, SaveSeed = saveSeed, RandomSeed = randomSeed,Randomized.Data = msim , Data = speciesData,burn.in = burn_in,burn.in.metric= burn.in.metric)
  # plot to screen the trace function for the burn in
  
  class(sim9.fast.out) <- "nullmod"
  return(sim9.fast.out)
}



#' sim9_single 
#' @description Function for a single iteration of the sequential swap.
#' @param speciesData binary presence-absence matrix.
#' @details See details for sim9.
#' @export
sim9_single <- function (speciesData = matrix(rbinom(100, 1, 0.5), nrow = 10)) 
{
  # select two random rows and create submatrix
  ran.rows <- sample.int(nrow(speciesData), 2)
  m.pair <- speciesData[ran.rows, ]
  
  # find columns if any in pair for which colsum =1; these can be swapped
  Sum.Is.One = colSums(m.pair) == 1
  
  # Only swap if there are two or more columns to swap
  if(sum(Sum.Is.One) > 1){
    columns <- which(Sum.Is.One)
    
    # return swap entries in the two rows
    speciesData[ran.rows, columns] <- m.pair[, sample(columns)]
  }
  
  return(speciesData)
}

Try the EcoSimR package in your browser

Any scripts or data that you put into this service are public.

EcoSimR documentation built on May 2, 2019, 7:26 a.m.