R/sim_communities_function.R

Defines functions sim_communities

Documented in sim_communities

#' Generating meta and local community samples for Neutral Theory analysis
#'
#' This function generates meta and local community samples to be used in neutral theory simulations
#' This method is an extension from the paper "Taxa-Abundance Distributions in Microbial Communities Using Environmental
#' Sequence Data" by Sloan et al.
#' @param n number of samples
#' @param m true migration parameter
#' @param Nmeta total number of reads in each meta community samples. Note that if the argument for Nmeta_sd != 0, then this value will be the mean number of reads across meta community samples.
#' @param r this parameter adjusts the amount of variation between samples. Values can range from 0 to 1. r = 0 will result in a uniformly distributed relative abundance distribution within a sample. Default is 1.
#' @param nOTU number of OTUs
#' @param ps this parameter adjusts the amount of variation between samples, with a range between 0 and 1. This value is the probability that a given OTU's relative abundance with be permuted in a sample. Values of 0 result in homologous taxa abundance distributions across all samples. Default in 1. 
#' @param Nlocal total number of reads in each local community samples. Note that if the argument for Nlocal_sd != 0, then this value will be the mean number of reads across meta community samples.
#' @param Nmeta_sd this parameter adjusts the amount of variation in the number of reads in meta community samples. Specifically, this value is the standard deviation used in a normal distribution with mean Nmeta from which samples sizes are generated
#' @param Nlocal_sd this parameter adjusts the amount of variation in the number of reads in local community samples. Specifically, this value is the standard deviation used in a normal distribution with mean Nlocal from which samples sizes are generated from.
#' @param pool_sim to pool meta/source community data during data simulation
#' @return a list of 4 elements. The first element is a matrix of local commmunity reads. The second element is a matrix of meta commmunity reads. The thrid element is the estimated average shannon diversity for each sample. The fourth element is the average shannon diversity after the multiple samples have been generated given the inputed value for ps and OTUs with no reads across any samples are removed (should be the same or close to the third element, this is mostly just a check).
#' @import vegan
#' @import gtools
#' @export


sim_communities <- function(n, m, Nmeta, r = 1, nOTU, ps = 1, Nlocal, Nmeta_sd = 0, Nlocal_sd = 0, pool_sim = FALSE) #note that H is using log2 here
{
  if(r == 0){r = 0.000001}
  r_vec <- r^(1:nOTU)
  c <- 1/sum((r^(1:nOTU)))
  pi1 <- c*r_vec
  H <- diversity(pi1, index = "shannon", MARGIN = 1, base = exp(1))
  
  meta <- matrix(0, nrow=nOTU,ncol=n+1)
  meta[,1] <- 1:nOTU
  
  #pi_mat <- matrix(pi1, nrow = nOTU, ncol = n, byrow = FALSE)
  perm01_mat <- replicate(n, rbinom(nOTU, 1, ps))
  
  meta[,2:ncol(meta)] <- sapply(1:n, function(col){
    pi_cur <- pi1
    perm01 <- perm01_mat[,col - 1]
    pi_cur[perm01 == 1] <- sample(pi_cur[perm01 == 1], sum(perm01), replace = FALSE)
    return(pi_cur) })
  
  NtMeta <- abs(ceiling(rnorm(n,mean=Nmeta,Nmeta*Nmeta_sd)))
  meta[,2:ncol(meta)] <- t(t(meta[,-1])*NtMeta)
  
  zero_row <- which(rowSums(meta[,-1]) == 0)
  for(i in 1:length(zero_row)){
    zero_col <- sample(1:length(meta[zero_row[i],-1]), 1) + 1
    meta[zero_row[i], zero_col] <- 1
  }
  
  H_new <- mean(diversity(meta[,-1], index = "shannon", MARGIN = 2, base = exp(1)))
  
  #simulate local data
  local <- matrix(0, nrow=nOTU,ncol=n+1)
  local[,1] <- 1:nOTU
  Ntl <- abs(ceiling(rnorm(n,mean=Nlocal,Nlocal*Nlocal_sd)))
  dl <- 1/mean(Ntl)
  pooled_OTU_total <- rowSums(meta[,2:ncol(meta)])
  pooled_rel_abund <- pooled_OTU_total/sum(pooled_OTU_total)
  
  if(pool_sim == TRUE){
    for (i in 2:(n+1))
    {
      pmeta <- pooled_rel_abund#apply(as.matrix(meta[,i]),1,FUN=function(t){t/sum(meta[,i])}) #ith sample relative abundance distribution from source community
      plocal0 <- rdirichlet(1,Ntl[i-1]*m*pmeta) #local rel abund. from dirichlet Nt*m*pmeta
      plocal <- ifelse(plocal0>dl, plocal0, 0)
      local[,i] <- ceiling(plocal*Ntl[i-1])
    }
  }
  else{
    for (i in 2:(n+1))
    {
      pmeta <- apply(as.matrix(meta[,i]),1,FUN=function(t){t/sum(meta[,i])}) #ith sample relative abundance distribution from source community
      plocal0 <- rdirichlet(1,Ntl[i-1]*m*pmeta) #local rel abund. from dirichlet Nt*m*pmeta
      plocal <- ifelse(plocal0>dl, plocal0, 0)
      local[,i] <- ceiling(plocal*Ntl[i-1])
    }  
  }
  
  return(list(local,meta, H, H_new))
}
kathalexknuts/NeutralTheorySimulations documentation built on May 22, 2019, 11:52 p.m.