#' Three-component geometric mixture model for estimating species richness
#'
#' This function implements the species richness estimation with a three-component mixture
#' exponential-mixed Poisson model (or three-component mixture exponential-mixed geometric model)
#' for estimating species richness
#'
#' @param input_data An input type that can be processed by convert()
#' @param cutoff Maximal frequency count of the data used to estimating the species richness. Default 10.
#'
#' @return An object of class \code{alpha_estimate}
#'
#' @importFrom breakaway convert alpha_estimate
#' @export
#'
#' @examples
#' library(breakaway)
#' data(apples)
#' three_geometric_model(apples, cutoff = 50)
#' @export
three_geometric_model <- function(input_data, cutoff = 10, ...) {
input_data <- convert(input_data)
## convert the data to frequency table
included <- input_data[input_data$index <= cutoff,]
excluded <- input_data[input_data$index > cutoff,]
ii <- included$index
ff <- included$frequency
if (nrow(included) == 0) {
included <- input_data
excluded <- list(index = Inf, frequency = 0)
}
c_tau <- sum(included$frequency)
c_excluded <- sum(excluded$frequency)
if (length(ii) <= 4) {
warning("Not enough different frequency counts. We recommend increasing the cutoff.")
return(alpha_estimate(estimate = NA,
error = NA,
estimand = "richness",
name = "Three-component Geometric mixture Model",
type = "parametric",
model = "Three-component Geometric mixture Model",
frequentist = TRUE,
parametric = TRUE,
reasonable = TRUE,
interval = NA,
interval_type = "Approximate: log-normal",
GOF0 = NA,
GOF5 = NA,
AICc = NA,
other = list(t1 = NA,
t2 = NA,
t3 = NA,
u = NA,
cutoff = cutoff)))
}
## EM algorithm for obtaining the parameter estimates
em_params <- three_mixed_exp_EM(ii, ff)
u1 <- em_params$u1
u2 <- em_params$u2
t1 <- em_params$t1
t2 <- em_params$t2
t3 <- em_params$t3
t4 <- (1 + t1) * u1 * t2 * t3 / (t1 * t2 * t3 + u2 * t1 * t3 + u1 * t2 * t3 -
u2 * t1 * t2 + t1 * t2 - u1 * t1 * t2)
t5 <- (1 + t2) * u2 * t1 * t3 / (t1 * t2 * t3 + u2 * t1 * t3 + u1 * t2 * t3 -
u2 * t1 * t2 + t1 * t2 - u1 * t1 * t2)
C_tau <- c_tau / (1 - t4 / (1 + t1) - t5 / (1 + t2) - (1 - t4 - t5) / (1 + t3))
f0 <- C_tau - c_tau
C_hat <- C_tau + c_excluded
## se
se_hat <- three_mixed_exp_se(C_tau, t1, t2, t3, t4, t5)
## confidence interval
dd <- ifelse(f0 == 0, 1, exp(1.96 * sqrt(log(1 + se_hat^2/f0))))
C_confint <- c(c_tau + c_excluded + f0 / dd, c_tau + c_excluded + f0 * dd)
## AICc
AICc <- 10 * c_tau / (c_tau - 6) -2 *three_mixed_exp_lld(ii, ff, t1, t2, t3, u1, u2, full = T)
## GOF0 and GOF5
O_c <- c(ff, 0)
pp_E <- t4 / t1 * (t1 / (1 + t1))^ii + t5 / t2 * (t2 / (1 + t2))^ii +
(1 - t4 - t5) / t3 * (t3 / (1 + t3))^ii
E_c <- c_tau * c(pp_E, 1-sum(pp_E))
GOF0_threemixedexp <- GOF(O_c = O_c, E_c = E_c, param = 3, bin.tol = 0)
GOF5_threemixedexp <- GOF(O_c = O_c, E_c = E_c, param = 3, bin.tol = 5)
alpha_estimate(estimate = C_hat,
error = se_hat,
estimand = "richness",
name = "Three-component Geometric mixture Model",
type = "parametric",
model = "Three-component Geometric mixture Model",
frequentist = TRUE,
parametric = TRUE,
reasonable = TRUE,
interval = C_confint,
interval_type = "Approximate: log-normal",
GOF0 = signif(pchisq(GOF0_threemixedexp$GOF, GOF0_threemixedexp$df, lower.tail = F), 4),
GOF5 = signif(pchisq(GOF5_threemixedexp$GOF, GOF5_threemixedexp$df, lower.tail = F), 4),
AICc = AICc,
other = list(t1 = t1,
t2 = t2,
t3 = t3,
t4 = t4,
t5 = t5,
cutoff = cutoff))
}
# u <- 0.5
# t1 <- 3
# t2 <- 6
## compute the likelihood of the parameters
## if (full == T) return the complete likelihood, with an additional constant term
#' Internal functions for estimating species richness with a three-component geomic mixture model
#'
#' @export
three_mixed_exp_lld <- function(ii, ff, t1, t2, t3, u1, u2, full = F) {
lld <- sum(ff * log(u1 / t1 * (t1 / (1 + t1))^ii + u2 / t2 * (t2 / (1 + t2))^ii +
(1 - u1 - u2) / t3 * (t3 / (1 + t3))^ii))
if (full == T) {
c_tau <- sum(ff)
lld <- lld + sum(log(1:c_tau)) -
sum(sapply(ff, function(fi) {
sum(log(1:fi))
}))
}
return(lld)
}
## Initial values of the EM algorithm
#' @export
three_mixed_exp_init <- function(ii, ff) {
init_values <- data.frame(t1 = 0, t2 = 0, t3 = 0, u1 = 0, u2 = 0)
ss <- length(ii)
c_tau <- sum(ff)
for (ss1 in 2:(ss-2)) {
for (ss2 in (ss1+1):(ss-1)) {
init_values <-
rbind(init_values,
c(t1 = sum(ii[1:ss1] * ff[1:ss1]) / sum(ff[1:ss1]) - 1,
t2 = sum(ii[(ss1 + 1):ss2] * ff[(ss1 + 1):ss2]) / sum(ff[(ss1 + 1):ss2]) - 1,
t3 = sum(ii[(ss2 + 1):ss] * ff[(ss2 + 1):ss]) / sum(ff[(ss2 + 1):ss]) - 1,
u1 = sum(ff[1:ss1]) / c_tau,
u2 = sum(ff[(ss1 + 1):ss2]) / c_tau))
}
}
init_values <- init_values[-1, ]
init_values$lld <- apply(init_values, 1, function(rr) {
three_mixed_exp_lld(ii, ff, rr[1], rr[2], rr[3], rr[4], rr[5])
})
init_values_mle <- init_values[init_values$lld == max(init_values$lld),]
return(list(t1 = init_values_mle$t1,
t2 = init_values_mle$t2,
t3 = init_values_mle$t3,
u1 = init_values_mle$u1,
u2 = init_values_mle$u2,
lld = init_values_mle$lld))
}
#' @export
three_mixed_exp_EM <- function(ii, ff, MaxIter = 1000, tol = 1e-7) {
c_tau <- sum(ff)
## initial values for EM algorithm
inits <- three_mixed_exp_init(ii, ff)
t1 <- inits$t1
t2 <- inits$t2
t3 <- inits$t3
u1 <- inits$u1
u2 <- inits$u2
## begin updating
k <- 1
flag <- 0
while (k <= MaxIter) {
#print(paste("k = ", k, ", t1 = ", t1, ", t2 = ", t2,
# ", u1 = ", u1, ", lld = ", lld, sep = ""))
#print(lld)
## update parameters
z1 <- (u1 / t1 * (t1 / (1 + t1))^ii) /
(u1 / t1 * (t1 / (1 + t1))^ii +
u2 / t2 * (t2 / (1 + t2))^ii +
(1- u1 - u2) / t3 * (t3 / (1 + t3))^ii)
z2 <- (u2 / t2 * (t2 / (1 + t2))^ii) /
(u1 / t1 * (t1 / (1 + t1))^ii +
u2 / t2 * (t2 / (1 + t2))^ii +
(1- u1 - u2) / t3 * (t3 / (1 + t3))^ii)
u1_new <- sum(ff * z1) / c_tau
u2_new <- sum(ff * z2) / c_tau
t1_new <- sum(ff * ii * z1) / sum(ff * z1) - 1
t2_new <- sum(ff * ii * z2) / sum(ff * z2) - 1
t3_new <- sum(ff * ii * (1 - z1 - z2)) / sum(ff * (1 - z1 - z2)) - 1
lld_new <- three_mixed_exp_lld(ii, ff, t1_new, t2_new, t3_new, u1_new, u2_new)
## exit iteration if convergent
if(u1_new / u1 > 1 - tol & u1_new / u1 < 1 + tol &
u2_new / u2 > 1 - tol & u2_new / u2 < 1 + tol &
t1_new / t1 > 1 - tol & t1_new / t1 < 1 + tol &
t2_new / t2 > 1 - tol & t2_new / t2 < 1 + tol &
t3_new / t3 > 1 - tol & t3_new / t3 < 1 + tol) {
u1 <- u1_new
u2 <- u2_new
t1 <- t1_new
t2 <- t2_new
t3 <- t3_new
lld <- lld_new
flag <- 1
break()
}
u1 <- u1_new
u2 <- u2_new
t1 <- t1_new
t2 <- t2_new
t3 <- t3_new
lld <- lld_new
k <- k + 1
}
return(list(u1 = u1, u2 = u2, t1 = t1, t2 = t2, t3 = t3, iteration = min(k, MaxIter),
flag = flag))
}
#' @export
three_mixed_exp_se <- function(cc, t1, t2, t3, t4, t5, MaxIter = 500, tol = 1e-7) {
a00 <- (1 - t4 / (1 + t1) - t5 / (1 + t2) - (1 - t4 - t5) / (1 + t3)) /
(t4 / (1 + t1) + t5 / (1 + t2) + (1 - t4 - t5) / (1 + t3))
a0 <- - matrix(c(-t4 / (1 + t1) ^ 2, -t5 / (1 + t2) ^ 2, -(1 - t4 - t5) / (1 + t3) ^ 2,
1 / (1 + t1) - 1 / (1 + t3), 1 / (1 + t2) - 1 / (1 + t3)), nrow = 5) /
(t4 / (1 + t1) + t5 / (1 + t2) + (1 - t4 - t5) / (1 + t3))
A <- 0
k <- 0
while(k < MaxIter) {
p_k <- t4 / (1 + t1) * (t1 / (1 + t1)) ^ k + t5 / (1 + t2) * (t2 / (1 + t2)) ^ k +
(1 - t4 - t5) / (1 + t3) * (t3 / (1 + t3)) ^ k
p_k1 <- (t1 / (1 + t1)) ^ k * (k - t1) * t4 / (t1 * (1 + t1) ^ 2)
p_k2 <- (t2 / (1 + t2)) ^ k * (k - t2) * t5 / (t2 * (1 + t2) ^ 2)
p_k3 <- (t3 / (1 + t3)) ^ k * (k - t3) * (1 - t4 - t5) / (t3 * (1 + t3) ^ 2)
p_k4 <- 1 / (1 + t1) * (t1 / (1 + t1)) ^ k - 1 / (1 + t3) * (t3 / (1 + t3)) ^ k
p_k5 <- 1 / (1 + t2) * (t2 / (1 + t2)) ^ k - 1 / (1 + t3) * (t3 / (1 + t3)) ^ k
p_kj <- matrix(c(p_k1, p_k2, p_k3, p_k4, p_k5), ncol = 1)
A_k <- p_kj %*% t(p_kj) / p_k
A <- A + A_k
if (max(abs(A_k)) < tol) break()
k <- k + 1
}
if (any(svd(A)$d < 1e-15)) {
warning("stardard error unattainable due to matrix singularity")
return(NA)
} else {
return(as.numeric(sqrt(cc) / sqrt(a00 - t(a0) %*% solve(A) %*% a0)))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.