Nothing
#RSE
#rare species estimation package
#####################################
################################################
#general functions
X.to.f = function(X){
f = factor(X, levels = 0:max(X))
f = table(f, exclude = 0) ## frequency counts
dimnames(f) = NULL
return(f)
}
f.to.X = function(f){
X = c()
k=1
while(k<=length(f)){
if(k>1) X = c(X,rep(k,f[k])) else {
X = rep(k,f[k])
}
k = k+1;
}
return(X);
}
################################################
#For abundance-based data
### Chao et al. (2015) Ecology paper: species-Rank abundance distribution
DetAbu <- function(x, zero=FALSE){
x <- unlist(x)
# print(x)
n <- sum(x)
f1 <- sum(x==1)
f2 <- sum(x==2)
f3 <- sum(x==3)
if(f2==0){
f1 <- max(f1 - 1, 0)
f2 <- 1
}
A1 <- f1 / n * ((n-1)*f1 / ((n-1)*f1 + 2*max(f2,1)))
A2 <- f2 / choose(n, 2) * ((n-2)*f2 / ((n-2)*f2 + 3*max(f3,1)))^2
if(zero==FALSE) x <- x[x>0]
q.solve <- function(q){
e <- A1 / sum(x/n*exp(-q*x))
out <- sum((x/n * (1 - e * exp(-q*x)))^2) - sum(choose(x,2)/choose(n,2)) + A2
abs(out)
}
q <- tryCatch(optimize(q.solve, c(0,1))$min, error = function(e) {1})
e <- A1 / sum(x/n*exp(-q*x))
o <- x/n * (1 - e * exp(-q*x))
return(c(e,q))
# o
}
### individual-based Chao et al. (2013):
### Generating species counts by the bootstrapping method
boot.abundance.fun = function(S.hat,f,b) {
D = sum(f)
ind = 1:length(f)
kmax = length(f)
d = rep(0,kmax)
n = sum(ind*f)
S.hat = ceiling(S.hat)
p0=0;f0=0;
if(f[2]==0){
f1 <- max(f[1] - 1, 0)
f2 <- 1
} else {
f1 = f[1]
f2 = f[2]
}
C = 1-f1 / n * ((n-1)*f1 / ((n-1)*f1 + 2*max(f2,1)))
d = ind/n*(1-b[1]*exp(-b[2]*ind))
cell.p = rep(d, times=f)
label.X = 1:length(cell.p)
if(S.hat>D) {
f0=S.hat-D;
p0=(1-C)/f0
sp.count = sample(c(length(label.X)+(1:f0),label.X), n,
replace = T, prob = c(rep(p0,f0),cell.p))
} else {
sp.count = sample(label.X, n, replace = T, prob = c(cell.p))
}
sp.count = factor(sp.count, levels = 1:(length(label.X)+f0))
sp.count =table(sp.count)
f.count = X.to.f(sp.count)
return(f.count)
}
##### The Bayesian-weight method
Pred.Fk.BW = function(f,m,b,k.show=3){
kmax = length(f)
cut.pts = max(10,kmax)
n = sum(f*(1:kmax))
## delta k
d = rep(0,cut.pts)
## estimated fk
est.f = rep(0,k.show)
### relative abundance estimation by Chao et al. (2015)
ind = 1:kmax
d = ind/n*(1-b[1]*exp(-b[2]*ind))
for(i in 1:k.show){## i for k
tmp = prod(1+n/(m-(0:(i-1))))
for(j in 1:min(i,kmax)){## j for q
# est.f[i] = est.f[i] +
# f[j]*choose(m,i)/(choose(m+n,i)-choose(m,i))*
# dbinom(i-j,m,d[j]);
est.f[i] = est.f[i]+f[j]*1/(tmp-1)*dbinom(i-j,m,d[j]);
}
}
return(est.f)
}
### Chao1 estimator
SpEst.Chao1.abun = function(f) {
est = sum(f)
n = sum(f * (1:length(f)))
if (length(f) > 1) {
if (f[2] > 0) {
A = f[1]/f[2]
} else {
if (f[1] > 0) {
A = f[1] - 1
} else {
A = 0
}
}
} else A = f[1] - 1
est = est + (n - 1)/n * A * f[1]/2
return(est)
}
##### the naive method
Pred.Fk.Naive = function(f,m,k.show=3){
kmax = length(f)
est.fk = rep(0,k.show)
n = sum(f*(1:kmax))
est.p=1:length(f)
est.p = est.p/n
for(j in 1:k.show){
# est.fk[j] = choose(m,j)*sum(f*est.p^j*(1-est.p)^(n+m-j))
est.fk[j] = sum(f*dbinom(j,m,est.p)*(1-est.p)^n)
}
est.fk
}
##### the unweighted method
Pred.Fk.unweighted = function(f,m,b,f0, k.show=3){
kmax = length(f)
cut.pts = max(10,kmax)
n = sum(f*(1:kmax))
d = rep(0,kmax)
## estimated fk
est.f = rep(0,k.show)
if(f[2]==0){
f1 <- max(f[1] - 1, 0)
f2 <- 1
} else {
f1 = f[1]
f2 = f[2]
}
if(f0>0) {d0 = f1 / n * ((n-1)*f1 / ((n-1)*f1 + 2*max(f2,1)))/f0
} else {
d0=0
}
### relative abundance estimation by Chao et al. (2015)
ind = 1:kmax
d = ind/n*(1-b[1]*exp(-b[2]*ind))
for(k in 1:k.show){
# est.f[k] = choose(m,k)*sum(
# f*d^k*(1-d)^(n+m-k)
# );
# est.f[k] = est.f[k] + choose(m,k)*f0*d0^k*(1-d0)^(n+m-k)
est.f[k] = sum(f*dbinom(k,m,d)*(1-d)^n)
if(f0>0) est.f[k] = est.f[k]+f0*dbinom(k,m,d0)*(1-d0)^n;
}# loop: k
return(est.f)
}
##### main function
## m: the total number of individuals in an additional sample
## f: species frequency counts data
## xi: species abundance data
Pred.abundance.rare = function(boot.rep = 100,f=NULL,xi=NULL,m,k.show = 3){
if(is.null(f)*is.null(xi)) {
print("Please input either species frequency counts data or species abundance data!!")
return(NULL);
}
if(is.null(f)) {
Xi = xi
f = X.to.f(Xi)
}
if(is.null(xi)) {
# f=Q
Xi = f.to.X(f)
}
# #### Point estimates
n=sum(Xi)
a.p.1 = DetAbu(Xi, zero=FALSE)
est.R = Pred.Fk.BW(f=f, m=m, b=a.p.1, k.show)
est.f0 = SpEst.Chao1.abun(f)-sum(f)
est.R2 = Pred.Fk.unweighted(f=f, m=m, b=a.p.1, est.f0,k.show)
est.Naive = Pred.Fk.Naive(f=f,m=m,k.show)
#### Calculating bootstrap SEs and CIs
#### Create a space for storing bootstrap samples
boot.output = NULL
for(i in 1:boot.rep){
b.f = boot.abundance.fun(S.hat=est.f0+sum(f), f, b=a.p.1)
b.Xi = f.to.X(b.f)
b.a.p.1 = DetAbu(b.Xi, zero=FALSE)
b.n = sum((1:length(b.f))*b.f)
## Naive estimator
b.est.p=1:length(b.f)
b.est.p = b.est.p/b.n
b.naive = NULL
for(j in 1:k.show) {
b.naive[j] = choose(m,j)*sum(b.f*b.est.p^j*(1-b.est.p)^(b.n+m-j))
}## loop: j
b.est.f0 = SpEst.Chao1.abun(b.f)-sum(b.f)
b.pred.fk.BW = Pred.Fk.BW(b.f, m, b=b.a.p.1)
b.pred.fk.unweighted = Pred.Fk.unweighted(b.f, m, b=b.a.p.1, b.est.f0)
boot.output = rbind(boot.output,c(proposed=b.pred.fk.BW[1:k.show], naive=b.naive[1:k.show], unweighted=b.pred.fk.unweighted[1:k.show]))
}### loop: boot.rep
point.est = cbind(proposed=est.R,naive=est.Naive,unweighted=est.R2)
boot.sd = apply(boot.output, 2, sd, na.rm=T)
boot.sd = matrix(boot.sd, ncol=3)
boot.ci = apply(boot.output, 2, quantile, probs=c(0.025,0.975))
Normal.ci = cbind(point.est-1.96*boot.sd,point.est+1.96*boot.sd)
for(i in 1:nrow(Normal.ci)){
for(j in 1:ncol(Normal.ci)){
if(Normal.ci[i,j]<0) Normal.ci[i,j]=0;
}
}
output = list()
output[["Data information"]]=c("Original sample size (n)"=n,"Additional sample size (m)"=m)
output[["Naive method"]] = round(cbind(1:k.show,point.est[,2],boot.sd[,2],Normal.ci[,c(2,5)]),1)
output[["Bayesian-weight method"]] = round(cbind(1:k.show,point.est[,1],boot.sd[,1],Normal.ci[,c(1,4)]),1)
output[["Unweighted method"]] = round(cbind(1:k.show,point.est[,3],boot.sd[,3],Normal.ci[,c(3,6)]),1)
colnames(output[["Naive method"]])=
colnames(output[["Bayesian-weight method"]])=
colnames(output[["Unweighted method"]])=
c("k","Estimate","Estimated SE","95% lower limit","95% upper limit")
output
} ### end of R function "Pred.incidence.rare"
################################################
#For incidence-based data
### Chao et al. (2015)
#' DetInc(y, zero=FALSE) is a function of estimating detected species incidence probability.
#' @param y a vector of species incidence frequency
#' @param zero reserves zero frequency or not. Default is FALSE.
#' @return a numerical vector
DetInc <- function(y,nT, zero=FALSE){
# y <- unlist(y)
# nT <- max(y)
# y <- y[-1]
Q1 <- sum(y==1)
Q2 <- sum(y==2)
Q3 <- sum(y==3)
if(Q2==0){
Q1 <- max(Q1 - 1, 0)
Q2 <- 1
}
A1 <- Q1 / nT * ((nT-1)*Q1 / ((nT-1)*Q1 + 2*max(Q2,1)))
A2 <- Q2 / choose(nT, 2) * ((nT-2)*Q2/((nT-2)*Q2 + 3*max(Q3,1)))^2
if(zero==FALSE) y <- y[y>0]
q.solve <- function(q){
e <- A1 / sum(y/nT*exp(-q*y))
out <- sum((y/nT * (1 - e * exp(-q*y)))^2) - sum(choose(y,2)/choose(nT,2)) + A2
abs(out)
}
q <- tryCatch(optimize(q.solve, c(0,1))$min, error = function(e) {1})
e <- A1 / sum(y/nT*exp(-q*y))
o <- y/nT * (1 - e * exp(-q*y))
Q0.hat <- ceiling(ifelse(Q2 == 0, (nT - 1) / nT * Q1 * (Q1 - 1) / 2, (nT - 1) / nT * Q1 ^ 2/ 2 / Q2)) #Chao2 estimator
return(c(e,q,Q0.hat))
# o
}
##### Unweighted
Pred.Qk.unweighted = function(Q,nT,u,b,Q0,k.show = 3){
f0=Q0
n = nT
f=Q
m=u
kmax = length(f)
cut.pts = max(10,kmax)
## modified f
a.f = rep(0,cut.pts)
## delta i
d = rep(0,cut.pts)
## estimated fk
est.f = rep(0,k.show)
if(f[2]==0){
f1 <- max(f[1] - 1, 0)
f2 <- 1
} else {
f1 = f[1]
f2 = f[2]
}
if(Q0>0) d0 = f1 / n * ((n-1)*f1 / ((n-1)*f1 + 2*max(f2,1)))/f0
### relative abundance estimation by Chao et al. (2015)
ind = 1:kmax
d = ind/nT*(1-b[1]*exp(-b[2]*ind))
for(k in 1:k.show){
# est.f[k] = choose(m,k)*sum(f*d^k*(1-d)^(n+m-k));
# est.f[k] = est.f[k] + choose(m,k)*f0*d0^k*(1-d0)^(n+m-k)
est.f[k] = sum(f*dbinom(k,m,d)*(1-d)^n)
if(f0>0) est.f[k] = est.f[k]+f0*dbinom(k,m,d0)*(1-d0)^n;
}# loop: k
return(est.f)
}
##### Proposed Bayesian-weight method
# Pred.Qk.BW = function(Q,nT,u,b){
Pred.Qk.BW = function(Q,nT,u,b,k.show = 3){
n = nT
f=Q
m=u
kmax = length(f)
cut.pts = max(10,kmax)
## delta i
d = rep(0,cut.pts)
## estimated fk
est.f = rep(0,k.show)
### relative abundance estimation by Chao et al. (2015)
ind = 1:kmax
d = ind/nT*(1-b[1]*exp(-b[2]*ind))
for(i in 1:k.show){## i for k
for(j in 1:min(i,kmax)){## j for q
# est.f[i] = est.f[i] +
# f[j]*choose(m,i)/(choose(m+n,i)-choose(m,i))*
# dbinom(i-j,m,d[j]);
tmp = prod(1+n/(m-(0:(i-1))))
est.f[i] = est.f[i] +
f[j]*1/(tmp-1)*dbinom(i-j,m,d[j]);
}
}
return(est.f)
}
### Naive method
Pred.Qk.Naive = function(nT,u,f,k.show = 3){
m.i=u
n.i=nT
est.fk = c()
for(j in 1:k.show) {
est.p=1:length(f)
est.p = est.p/nT
# est.fk[j] = sum(f*(1-est.p)^n.i*choose(m.i,j)*est.p^j*(1-est.p)^(m.i-j))
est.fk[j] = sum(f*dbinom(j,m.i,est.p)*(1-est.p)^n.i)
}
est.fk
}
### incidence-based Chao et al. (2013):
#Generating species counts by the bootstrapping method
boot.incidence.fun = function(S.hat, nT, Q, b) {
f = Q
D = sum(f)
kmax = length(f)
ind = 1:kmax
d = rep(0,kmax)
### the total number of quadrats in the original sample
n = nT
### the estimated species richness in the original sample
S.hat = ceiling(S.hat)
p0=0;f0=0;
if(f[2]==0){
f1 <- max(f[1] - 1, 0)
f2 <- 1
} else {
f1 = f[1]
f2 = f[2]
}
#### the estimated sample coverage
C = 1-f1 / n * ((n-1)*f1 / ((n-1)*f1 + 2*max(f2,1)))
d = ind/nT*(1-b[1]*exp(-b[2]*ind))
cell.p = rep(d, times=f)
if(S.hat>D) {
f0 = S.hat - D
p0=(1-C)/f0
det.p = c(rep(p0,f0),cell.p)
} else {
det.p = cell.p
}
sp.count = rep(0,length(det.p))
for(i in 1:length(det.p)){
sp.count[i] = rbinom(1,n,det.p[i])
}
### bootstrapping sample represented by f1, f2, ....
f.count = X.to.f(sp.count)
return(f.count)
}
##### main function
Pred.incidence.rare = function(boot.rep = 100,Q=NULL,xi=NULL,nT,u,k.show = 3){
### the number of quadrats in an additional sample
m = u
if(is.null(Q)*is.null(xi)) {
print("Please input either frequency counts data or incidence counts data!!")
return(NULL);
}
if(is.null(Q)) {
Xi = xi
f = X.to.f(Xi)
}
if(is.null(xi)) {
f=Q
Xi = f.to.X(f)
}
#### Point estimates
out.chao.pi.q = DetInc(Xi,nT, zero=FALSE)
a.p.1 = out.chao.pi.q[1:2]
Q0.est = out.chao.pi.q[3]
est.R = Pred.Qk.BW(f,nT,m,b=a.p.1,k.show)
est.R2 = Pred.Qk.unweighted(f,nT,m,b=a.p.1,Q0.est,k.show)
est.Naive = Pred.Qk.Naive(nT,m,f,k.show)
#### Calculating bootstrap SEs and CIs
#### Create a space for storing bootstrap samples
boot.output = NULL
for(i in 1:boot.rep){
b.f = boot.incidence.fun(S.hat = sum(f)+Q0.est, nT, f, b=a.p.1)
b.Xi = f.to.X(b.f)
b.out.chao.pi.q = DetInc(Xi,nT, zero=FALSE)
b.a.p.1 = b.out.chao.pi.q[1:2]
b.Q0.est = b.out.chao.pi.q[3]
b.a.p.1 = DetInc(b.Xi, nT, zero=FALSE)
# b.naive = Pred.Qk.Naive(nT, m, b.f, k.show)
b.est.R = Pred.Qk.BW(b.f,nT,m,b=b.a.p.1, k.show)
b.est.R2 = Pred.Qk.unweighted(b.f, nT, m, b=b.a.p.1, b.Q0.est, k.show)
b.est.Naive = Pred.Qk.Naive(nT, m, b.f, k.show)
boot.output = rbind(boot.output,
c(proposed=b.est.R[1:k.show],
naive=b.est.Naive[1:k.show],
unweighted=b.est.R2[1:k.show]))
}### loop: boot.rep
point.est = cbind(proposed=est.R,naive=est.Naive,unweighted=est.R2)
boot.sd = apply(boot.output, 2, sd, na.rm=T)
boot.sd = matrix(boot.sd, ncol=3)
boot.ci = apply(boot.output, 2, quantile, probs=c(0.025,0.975),na.rm=T)
Normal.ci = cbind(point.est-1.96*boot.sd, point.est+1.96*boot.sd)
# Normal.ci = mapply(max,cbind(point.est-1.96*boot.sd, point.est+1.96*boot.sd),0)
for(i in 1:nrow(Normal.ci)){
for(j in 1:ncol(Normal.ci)){
if(Normal.ci[i,j]<0) Normal.ci[i,j]=0;
}
}
output = list()
output[["Data information"]]=c("Original sample size (t)"=nT,"Additional sample size (u)"=u)
output[["Naive method"]] = round(cbind(1:k.show,point.est[,2],boot.sd[,2],Normal.ci[,c(2,5)]),1)
output[["Bayesian-weight method"]] = round(cbind(1:k.show,point.est[,1],boot.sd[,1],Normal.ci[,c(1,4)]),1)
output[["Unweighted method"]] = round(cbind(1:k.show,point.est[,3],boot.sd[,3],Normal.ci[,c(3,6)]),1)
colnames(output[["Naive method"]])=
colnames(output[["Bayesian-weight method"]])=
colnames(output[["Unweighted method"]])=
c("k","Estimate","Estimated SE","95% lower limit","95% upper limit")
output
} ### end of R function "Pred.incidence.rare"
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.