# Hodegepodge of code from Anne Chao group and Dushoff/Roswell for estimating Hill diversity and its uncertainty.
# Bt_prob_abu = function(x){
# x = x[x>0]
# n = sum(x)
# f1 = sum(x==1)
# f2 = sum(x==2)
# #compute the coverage here
# C = 1 - f1/n*ifelse(f2>0,(n-1)*f1/((n-1)*f1+2*f2),ifelse(f1>0,(n-1)*(f1-1)/((n-1)*(f1-1)+2),0))
# #use coverage to define a weighting for observed frequencies... this is lambda_hat in Chao et al. 2013 and 2014 appendices explaining this bootstrapping procedure. I haven't quite wrapped my head around this yet.
# #coverage deficit=p(next individual is a new species)=proportion of true community absent from sample
# # the denominator is the expected probability of observing all x if x/n=p
# W = max((1-C)/sum(x/n*(1-x/n)^n), 0, na.rm=T)
#
# #use that weighting here to get p.new for observed species in x
# p.new = x/n*(1-W*(1-x/n)^n)
# #then get number of species observed 0 times using chao1
# f0 = ceiling(ifelse(f2>0, (n-1)/n*f1^2/(2*f2),max((n-1)/n*f1*(f1-1)/2, 0, na.rm=T))) #edited to deal with case where f1, f2=0
# #assume that all unobserved have equal p, given by total coverage deficit divided by number of unobserved.
# p0 = max((1-C)/f0, 0, na.rm=T) #consider issues if p0==0
# #p.new includes estimated p's for the observed plus estimated p's for unobserved.
# p.new=c(p.new,rep(p0,f0))
# return(p.new)
# }
# ## Can we do a better job of making a fake_community.md?
# Bt_prob_abu_fiddle = function(x){
# x = x[x>0]
# n = sum(x)
# p <- x/n
#
# ## What is the smallest number of unobserved species consistent
# ## with a postulated coverage and target value of pair probability?
# umin <- function(p, C, pp){
# gap <- pp-C^2*sum(p^2)
# if (gap<=0) stop("C too large in umin (Bt_prob_abu_fiddle)")
# return((1-C)^2/gap)
# }
# ## What is the expected richness of a sample from a community?
# ## or pseudo-community?
# expRich <- function(n, alpha, X=NULL, u=1){
# return(
# sum(1-(1-alpha)^n)
# + ifelse(is.null(X), 0, u*(1-(1-X/u)^n))
# )
# }
# ## What is the expected richness of a hypothetical pseudo-community
# ## with u=umin (not an integer) and even distribution therein?
# ## Optionally subtract a postulated richness (for uniroot)
# uminRich <- function(C, n, p, pp, r=0){
# u <- umin(p, C, pp)
# return(expRich(n, C*p, 1-C, u)-r)
# }
# ## Our indirect coverage estimate is a coverage that produces a
# ## pseudo-community with expected richness equal to observed richness
# indCov <- function(x){
# eps <- 1e-3
# r <- length(x)
# n <- sum(x)
# pp <- sum(x*(x-1))/(n*n-1)
# Cmax <- pp/sum((x/n)^2)
# print(r)
# print(uminRich(eps*Cmax, n=n, p=x/n, pp=pp))
# print(uminRich((1-eps)*Cmax, n=n, p=x/n, pp=pp))
# return(uniroot(uminRich, lower=eps*Cmax, upper=(1-eps)*Cmax
# , n=n, p=x/n, pp=pp, r=r
# ))
# }
# return(indCov(x))
# }
# ## Sampled instead of deterministic abundance resampling
# ## Various things tried, but does not help with conservative CIs
# Bt_prob_abu_samp = function(x){
# x = x[x>0]
# n = sum(x)
# f1 = rpois(1, sum(x==1))
# f2 = rpois(1, sum(x==2))
# #Coverage
# C = 1 - f1/n*ifelse(f2>0,(n-1)*f1/((n-1)*f1+2*f2),ifelse(f1>0,(n-1)*(f1-1)/((n-1)*(f1-1)+2),0))
# #use coverage to define a weighting for observed frequencies... this is lambda_hat in Chao et al. 2013 and 2014 appendices explaining this bootstrapping procedure.
# #coverage deficit=p(next individual is a new species)=proportion of true community absent from sample
# # the denominator is the expected probability of observing all x if x/n=p
# W = (1-C)/sum(x/n*(1-x/n)^n)
#
# #use that weighting here to get p.new for observed species in x
# p.new = x/n*(1-W*(1-x/n)^n)
# #then get number of species observed 0 times using chao1
# f0 = rpois(1, ifelse(f2>0,(n-1)/n*f1^2/(2*f2),(n-1)/n*f1*(f1-1)/2))
# #assume that all unobserved have equal p, given by total coverage deficit divided by number of unobserved.
# p0 = (1-C)/f0
# #p.new includes estimated p's for the observed plus estimated p's for unobserved.
# p.new=c(p.new,rep(p0,f0))
# return(p.new)
# }
################################################################
# x<-1:5
# B<-10
# l<-1
# truediv<-5
#' Assess Chao and Jost 2015 Hill diversity CI
#'
#' Given empirical abundances \code{x}, use the Chao and Jost 2015 MEE method to
#' estimate the sampling distribution of the Hill diversity (given exponent
#' \code{l}), and see where known, true diversity \code{truediv} falls in that
#' distribution
#'
#' This function could be extended to look at e.g. incidence-based estimations.
#'
#' @param x Numeric vector of integer species abundances in a sample
#' @param B Scalar, number of replicate bootstrap draws
#' @template l_template
#' @template q_template
#' @param truediv Scalar, known true Hill diversity of the pool from which
#' sample is drawn, for comparison to estimated sampling distribution
#' @param conf Scalar, target coverage probability of estimated CI
#'
#' @return data.frame with estimated p-value for true diversity, the diversity
#' values of the upper and lower estimated confidence limits, the asymptotic
#' Hill diversity estimate, and the empirical diversity
#'
#' @noRd
checkchao <- function(x
, B
, l
, q = NULL
, truediv
, conf = 0.95){ #, truemu_n
if(!is.null(q)){
l = 1-q
warning("l has been set to 1-q")
}
n <- sum(x)
#columns of this matrix are replicate boostraps
data.bt = stats::rmultinom(B, n, Bt_prob_abu(x))
#get estimator for each bootstrapped sample
pro = apply(data.bt
, 2
, function(boot) Chao_Hill_abu(boot, l))
#mean correction
pro <- pro - mean(pro) + Chao_Hill_abu(x, l)
#break ties
less <- sum(pro < truediv) / length(pro)
more <- (length(pro) - sum(pro > truediv)) / length(pro)
p <- stats::runif(1, min(less, more), max(less, more))
lower <- max(pro[which(dplyr::min_rank(pro) <= max(floor(B * (1 - conf) / 2), 1))])
upper <- min(pro[which(dplyr::min_rank(-pro) <= max(floor(B * (1 - conf) / 2) , 1))])
return(data.frame(p = p
, lower = lower
, upper = upper
, truediv = truediv
, "chaoest" = Chao_Hill_abu(x, l)
, "obsD" = dfun(x, l)
)
)}
#' Approximate CI for sample Hill diversity
#'
#' Computes Chao and Jost 2015's suggested CI for empirical Hill diversity,
#' sampling from an infinite pool (i.e. with replacement).
#'
#' @template l_template
#' @template q_template
#' @param SAD List, output from function `fit_SAD`
#' @param size Scalar, integer number of individuals to sample.
#' @param B Scalar, integer number of bootstrap replicates.
#' @param treumun = Scalar, true expected diversity for sample of size = `size`.
#' @param conf Scalar, nominal confidence level.
#' @param ... Arguments passed to other functions
#'
#' @return A data frame with upper and lower confidence limits and the p-value
#' for the true diversity, computed both with and without the mean correction
#' implemented in Chao and Jost 2015 MEE.
#'
#' @references
#' \insertRef{Chao2015}{MeanRarity}
#'
#'
#' @noRd
obscp_inf <- function(l = l
, q = NULL
, size = size
, SAD = SAD
, B = 2000
, truemun = truemun
, conf = 0.95
, ...){
if(!is.null(q)){
l = 1-q
warning("l has been set to 1-q")
}
sam <- sample_infinite(SAD$rel_abundances, size = size)
data.bt = stats::rmultinom(B
, size
, Bt_prob_abu(sam)) #this genenerates "bootstrapped" samples
obs <- rarity(sam, l) # observed diversity
pro = apply(data.bt, 2, function(boot) rarity(boot, l)) #sample diversity for bootstraps
pro_mc <- pro - mean(pro) + obs
less <- sum(pro_mc < truemun) / length(pro_mc)
more <- (length(pro_mc) - sum(pro_mc > truemun))/length(pro_mc)
p <- stats::runif(1, min(less, more), max(less, more)) #this is a way to simulate the theoretical distribution of p-values in spite of ties.
lower <- max(pro_mc[which(dplyr::min_rank(pro_mc) <= max(floor(B * (1 - conf) / 2), 1))])
upper <- min(pro_mc[which(dplyr::min_rank(-pro_mc) <= max(floor(B * (1 - conf)/2), 1))])
less_no_mc <- sum(pro < truemun) / length(pro)
more_no_mc <- (length(pro) - sum(pro > truemun)) / length(pro)
p_no_mc <- stats::runif(1, min(less, more), max(less, more))
lower_no_mc <- max(pro[which(dplyr::min_rank(pro) <= max(floor(B * (1 - conf) / 2) ,1))])
upper_no_mc <- min(pro[which(dplyr::min_rank(-pro) <= max(floor(B * (1 - conf)/2) ,1))])
return(data.frame("p" = p
, "p_no_mc" = p_no_mc
, upper = upper
, lower = lower
, upper_no_mc = upper_no_mc
, lower_no_mc = lower_no_mc
, "truemu" = truemun
, "obsD" = obs
, "l" = l
, "size" = size ))
}
#' Approximate CI for sample Hill diversity
#'
#' Computes Chao and Jost 2015's suggested CI for empirical Hill diversity,
#' sampling from a finite pool without replacement.
#'
#' @template q_template
#' @template l_template
#' @template ab_template
#' @param size Scalar, integer number of individuals to sample.
#' @param B Scalar, integer number of bootstrap replicates.
#' @param treumun = Scalar, true expected diversity for sample of size = `size`.
#' @param conf Scalar, nominal confidence level.
#' @param ... Arguments passed to other functions
#'
#' @return A data frame with upper and lower confidence limits and the p-value
#' for the true diversity, computed both with and without the mean correction
#' implemented in Chao and Jost 2015 MEE.
#'
#' @seealso \url{https://doi.org/10.1111/2041-210X.12349}
#'
#' @noRd
obscp_obs <- function(l = l
, q = NULL
, size = size
, ab = ab
, B = 2000
, truemun = truemun
, conf = 0.95
, ...){
if(!is.null(q)){
l = 1-q
warning("l has been set to 1-q")
}
sam <- sample_finite(ab, size = size)
data.bt = stats::rmultinom(B
, size
, Bt_prob_abu(sam)) #this genenerates "bootstrapped" samples
obs <- rarity(sam, l) # observed diversity
pro = apply(data.bt, 2, function(boot) rarity(boot, l)) #sample diversity for bootstraps
pro_mc <- pro - mean(pro) + obs
less <- sum(pro_mc < truemun) / length(pro_mc)
more <- (length(pro_mc) - sum(pro_mc > truemun))/length(pro_mc)
p <- stats::runif(1, min(less, more), max(less, more)) #this is a way to simulate the theoretical distribution of p-values in spite of ties.
lower <- max(pro_mc[which(dplyr::min_rank(pro_mc) <= max(floor(B * (1 - conf) / 2), 1))])
upper <- min(pro_mc[which(dplyr::min_rank(-pro_mc) <= max(floor(B * (1 - conf)/2), 1))])
less_no_mc <- sum(pro < truemun) / length(pro)
more_no_mc <- (length(pro) - sum(pro > truemun)) / length(pro)
p_no_mc <- stats::runif(1, min(less, more), max(less, more))
lower_no_mc <- max(pro[which(dplyr::min_rank(pro) <= max(floor(B * (1 - conf) / 2) ,1))])
upper_no_mc <- min(pro[which(dplyr::min_rank(-pro) <= max(floor(B * (1 - conf)/2) ,1))])
return(data.frame("p" = p
, "p_no_mc" = p_no_mc
, upper = upper
, lower = lower
, upper_no_mc = upper_no_mc
, lower_no_mc = lower_no_mc
, "truemu" = truemun
, "obsD" = obs
, "l" = l
, "size" = size ))
}
#' God's unbiased estimator
#'
#' The mean rarity perspective suggests a novel approach to estimating true
#' diversity from a sample, using the fact that the sample mean is an unbiased
#' estimate of the population mean. Although how to estimate a species’ rarity
#' from sample data remains unsolved, we test the promise of this idea using
#' what we call God’s estimator: All-knowing, God can plug in the true
#' population parameters for each species. Nevertheless, maybe for fun, God
#' chooses to compute the sample mean, weighting species’ true rarities by
#' their sampled frequency.
#'
#'
#' @param freqs Non-negative numeric vector of observed species frequencies
#' @param true_p Non-negative numeric vector of true, population-level species
#' frequencies
#' @template l_template
#' @template q_template
#' @examples
#' GUE(freqs = 1:10/sum(1:10), true_p = 1:10/sum(1:10), l = 1) # true richness 10
#' GUE(freqs = rep(1:5,2)/sum(1:10), true_p = 1:10/sum(1:10), l = 1) # true richness 10
#' @export
GUE <- function(freqs, true_p, l, q = NULL) {
if(!is.null(q)){
l = 1-q
warning("l has been set to 1-q")
}
refreq = freqs/sum(freqs)
retrue = true_p/sum(true_p)
wts = refreq[freqs*true_p > 0]
rabs = retrue[freqs*true_p > 0]
ifelse((l == 0)
, exp(sum(wts * log(1/rabs)))
, (sum(wts * (1/rabs)^l))^(1/l))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.