#' Randomize community data matrix with dispersal null model
#'
#' Null model that maintains species richness and settles species in simulated plots
#' with a probability proportional to their abundance in and distance from the plot in
#' question.
#'
#' @param picante.cdm Picante-style community data matrix with
#' communities/plots/plots/etc as rows and species as columns
#' @param tree Phylo object
#' @param distances.among A symmetric distance matrix, summarizing the distances among all
#' plots from the cdm.
#' @param abundance.matters Default is TRUE. If FALSE, species are sampled from
#' neighboring grid cells with equal probability.
#' @param abundance.assigned Default is "directly". If set to "explore", rather than
#' assigning species abundances equal to those in neighboring cells, a normal distribution
#' of standard deviation 1 is created around that abundance, rounded to whole numbers,
#' negative numbers are converted to 1, and then abundance is assigned from this
#' distribution. Note, importantly, that if the cdm is a relative abundance matrix, as
#' opposed to an absolute abundance matrix, if "explore" is chosen, odd results are likely
#' (don't do it). If set to "overall", which may be preferable when abundance.matters is
#' set to FALSE, species' are assigned abundances by drawing from the vector of non-zero
#' abundances from the original cdm.
#'
#' @details This function can run quite slowly, as it employs a while loop to discard
#' selected species if they are already contained in the plot being simulated. The
#' function contains an internal check to ensure that it doesn't get stuck in an
#' indefinite while loop. Specifically, it checks that an observed plot does not
#' contain more species than the sum of unique species in the remaining plots. The
#' argument distances.among is flexible, and can relate to e.g., straight-line distances,
#' great-circle distances, and ecological distances. If the argument abundance.matters is
#' set to FALSE, then species' abundances in neighboring grid cells does not influence
#' their probability of settling (only their proximity influences the probability of
#' settling). If this is the case, it is recommended that the argument abundance.assigned
#' be set to "overall".
#'
#' @return A matrix with the same dimensions as the input cdm.
#'
#' @export
#'
#' @importFrom picante sample2matrix
#'
#' @references Miller, E. T. 2016. A new dispersal-informed null model for
#' community ecology shows strong performance. biorxiv.
#'
#' @examples
#' #set up a matrix to simulate lat/long
#' coordDF <- matrix(ncol=2, nrow=100)
#'
#' coordDF[,1] <- runif(n=100, min=40, max=50)
#' coordDF[,2] <- runif(n=100, min=-130, max=-120)
#'
#' #convert to data frame, give column names. also give row names such as if the cells had
#' #names (as they should or there'd be no way to track them)
#' coordDF <- as.data.frame(coordDF)
#'
#' row.names(coordDF) <- paste("cell", 1:100, sep="")
#'
#' names(coordDF) <- c("latitude","longitude")
#'
#' #calculate the distances among all of these points. in the real program you're going to
#' #want to calculate great arc distance or whatever it's called
#' distances <- dist(coordDF, diag=TRUE, upper=TRUE)
#'
#' #turn it into a symmetric distance matrix
#' distances <- as.matrix(distances)
#'
#' #simulate a regional phylogeny of 100 species
#' tree <- geiger::sim.bdtree(b=1, d=0, stop="taxa", n=100)
#'
#' #simulate a community data matrix of 100 cells by 100 species. do it 4 times so that
#' #you can use your simulateComm function and have it span a reasonable range of richness
#' sim.abundances <- round(rlnorm(5000, meanlog=2, sdlog=1)) + 1
#'
#' cdm1 <- simulateComm(tree, richness.vector=10:34, abundances=sim.abundances)
#' cdm2 <- simulateComm(tree, richness.vector=10:34, abundances=sim.abundances)
#' cdm3 <- simulateComm(tree, richness.vector=10:34, abundances=sim.abundances)
#' cdm4 <- simulateComm(tree, richness.vector=10:34, abundances=sim.abundances)
#'
#' #bind these into a list and use dplyr rbind_all to bind together. recast as data frame
#'
#' cdmList <- list(cdm1, cdm2, cdm3, cdm4)
#'
#' cdm <- dplyr::bind_rows(cdmList)
#'
#' cdm <- as.data.frame(cdm)
#'
#' #fix as necessary manually here (i.e. make sure dimensions are 100 x 100), seems to
#' #usually work. then give cell names
#'
#' row.names(cdm) <- paste("cell", 1:100, sep="")
#'
#' #fill NAs with 0s.
#'
#' cdm[is.na(cdm)] <- 0
#'
#' #not run
#' #newCDM <- dispersalNull(cdm, tree, distances)
dispersalNull <- function(picante.cdm, tree, distances.among, abundance.matters=TRUE,
abundance.assigned="directly")
{
#create a check to ensure that no cell in the CDM contains more species than are found
#in the sum of the remaining cells
tempCheck <- checkCDM(picante.cdm)
if(tempCheck == "fail")
{
stop("CDM incompatible with dispersalNull model. See 'checkCDM' for details")
}
#create a check to ensure that all species that occur in the cdm are also in the tree
if(length(setdiff(names(picante.cdm),tree$tip.label))!=0)
{
stop("You have included species in your cdm that are not in your phylogeny")
}
#create a check to ensure that cdm and distances.among contain the same exact cells
if(length(setdiff(row.names(picante.cdm), row.names(distances.among))) != 0 &
length(setdiff(row.names(distances.among), row.names(picante.cdm))) != 0)
{
stop("Your cdm plot names and distance matrix names do no match")
}
#create a check to ensure that cdm and distances.among are in the same order cell-wise
if(any(row.names(picante.cdm)!=row.names(distances.among)))
{
stop("Your cdm and distance matrix are not in the same plot order")
}
#calculate species richness of input cdm. for each plot, sample a grid cell based
#on its proximity to the plot in question (this is your sampleNear function), then
#sample a species based on its abundance in the sampled grid cell. if that species is
#already included in the replacement plot, run this over again. do this entire loop
#per plot while the length of replacement species is < the observed richness for
#that plot
richness <- apply(picante.cdm, 1, lengthNonZeros)
#generate a vector of all non-zero abundances from the input CDM
overallAbundance <- picante.cdm[picante.cdm!=0]
#create a list that you will use to save vectors of species into (one list element
#per plot)
replacementList <- list()
for(i in 1:dim(picante.cdm)[1])
{
#create a temporary empty data frame in phylocom format. make it just long enough
#for the plot in question
phylocom <- matrix(ncol=3, nrow=richness[i], 0)
phylocom <- as.data.frame(phylocom)
#give it appropriate names
names(phylocom) <- c("plot", "abund", "id")
#start the while loop. set j equal to 0. this will be like a row ID. each time
#an appropriate species is found, it will add one to the ID and move along the
#phylocom df. the while loop continues while the length of species for a given
#plot remains less than the richness of the observed plot
j <- 0
while(length(phylocom[phylocom$plot==row.names(picante.cdm)[i],]$id) < richness[i])
{
#select the plot you will sample from. give your selectNear a column from
#the distance matrix
selectedPlot <- selectNear(distances.among[,i])
#select a species from that plot. probability proportional to abundance
if(abundance.matters)
{
temp <- sample(x=picante.cdm[selectedPlot,], size=1,
prob=picante.cdm[selectedPlot,])
}
#or where probability is not proportional to abundance in neighboring cell.
#note that you need to restricted the species from the selected plot to be
#those with non-zero abundances! previously did not, this was a bug
else
{
#note that this means the selected species lose their names, so add them
#back in
possible <- picante.cdm[selectedPlot,][picante.cdm[selectedPlot,] !=0 ]
names(possible) <- names(picante.cdm[selectedPlot,])[picante.cdm[selectedPlot,]!=0]
temp <- sample(x=possible, size=1)
}
#if the species selected is not found in that plot in the growing phylocom
#data frame, add the relevant info to that frame
if(!(names(temp) %in% phylocom[phylocom$plot==row.names(picante.cdm)[i],]$id))
{
#move the row ID along
j <- j+1
#set the plot name to be correct
phylocom[j,1] <- row.names(picante.cdm)[i]
#assign the species the abundance it had in the original CDM
if(abundance.assigned=="directly")
{
phylocom[j,2] <- temp
}
#or else create a normal distribution and sample from it
else if(abundance.assigned=="explore")
{
distribution <- round(rnorm(n=100, mean=as.numeric(temp), sd=1))
distribution[distribution < 0] <- 1
chosen <- sample(distribution, 1)
phylocom[j,2] <- chosen
}
else if(abundance.assigned=="overall")
{
chosen <- sample(overallAbundance, 1)
phylocom[j,2] <- chosen
}
else
{
stop("abundance.assigned argument set to unrecognized value")
}
phylocom[j,3] <- names(temp)
}
}
#set the appropriate element in the list to the temporary dataframe
replacementList[[i]] <- phylocom
}
#reduce the list to a single data frame and convert to matrix
newCDM <- Reduce(rbind, replacementList)
newCDM <- sample2matrix(newCDM)
#it comes out in a weird order, so sort back to same plot order as input CDM
newCDM <- newCDM[row.names(picante.cdm),]
#add columns for species that were not recorded in any plots
notFound <- setdiff(tree$tip.label, names(newCDM))
if(length(notFound > 0))
{
toBind <- matrix(nrow=dim(newCDM)[[1]], ncol=length(notFound), 0)
colnames(toBind) <- notFound
newCDM <- cbind(newCDM, toBind)
}
#and sort into same species order as input cdm
newCDM <- newCDM[,colnames(picante.cdm)]
#for reasons that are not entirely clear to me, this is still returning as a DF
#convert to matrix
newCDM <- as.matrix(newCDM)
newCDM
}
selectNear <- function(distances.between)
{
#distances.between is a vector of distances between the focal cell and other cells
#first exclude distances to the focal cell (and any other with distance = 0)
distances.between <- distances.between[distances.between != 0]
#now sample a cell name with a probability proportional to the inverse of the distance
#from the focal cell
newCell <- sample(x=names(distances.between), size=1, prob=1/distances.between)
newCell
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.