SpadeR: Species Prediction and Diversity Estimation with R

CV.Ind=function(x)
{
  x <- x[x>0]
  n <- sum(x)
  f1 <- sum(x == 1)
  C.hat=1-f1/n
  Sobs=sum(x>0)
  S0=Sobs/C.hat
  r.square=max(S0*sum(x*(x-1))/n/(n-1)-1,0  )
  r.square^0.5
}
Chao1=function(x,conf=0.95)
{
  z <--qnorm((1 - conf)/2)
  x=x[x>0]
  D=sum(x>0)
  f1=sum(x==1)
  f2=sum(x==2)
  n=sum(x)
  if (f1 > 0 & f2 > 0)
  {
    S_Chao1 <- D + (n - 1)/n*f1^2/(2*f2)
    var_Chao1 <- f2*((n - 1)/n*(f1/f2)^2/2 + 
                            ((n - 1)/n)^2*(f1/f2)^3 + ((n - 1 )/n)^2*(f1/f2)^4/4)
    
    t <- S_Chao1 - D
    K <- exp(z*sqrt(log(1 + var_Chao1/t^2)))
    CI_Chao1 <- c(D + t/K, D + t*K)
  } 
  else if (f1 > 1 & f2 == 0)
  {
    S_Chao1 <- D + (n - 1)/n*f1*(f1 - 1)/(2*(f2 + 1))
    var_Chao1 <- (n - 1)/n*f1*(f1 - 1)/2 + 
      ((n - 1)/n)^2*f1*(2*f1 - 1)^2/4 - ((n - 1)/n)^2*f1^4/4/S_Chao1
    
    t <- S_Chao1 - D
    K <- exp(z*sqrt(log(1 + var_Chao1/t^2)))
    CI_Chao1 <- c(D + t/K, D + t*K)
  } 
  else 
  {
    S_Chao1 <- D
    i <- c(1:max(x))
    i <- i[unique(x)]
    var_obs <- sum(sapply(i, function(i) sum(x==i)*(exp(-i) - exp(-2*i)))) - 
      (sum(sapply(i, function(i)i*exp(-i)*sum(x==i))))^2/n
    var_Chao1 <- var_obs
    P <- sum(sapply(i, function(i) sum(x==i)*exp(-i)/D))
    CI_Chao1 <- c(max(D, D/(1 - P) - z*sqrt(var_obs)/(1 - P)), D/(1 - P) + z*sqrt(var_obs)/(1 - P))  
  }
  return( c( round(c(S_Chao1,var_Chao1^0.5,CI_Chao1[1],CI_Chao1[2]),1),conf)    )
}

Chao1_bc=function(x,conf=0.95)
{
  z <- -qnorm((1 - conf)/2)
  x=x[x>0]
  D=sum(x>0)
  f1=sum(x==1)
  f2=sum(x==2)
  n=sum(x)
  
  S_Chao1_bc <- D + (n - 1)/n*f1*(f1 - 1)/(2*(f2 + 1))
  var_Chao1_bc <- (n - 1)/n*f1*(f1 - 1)/2/(f2 + 1) + 
    ((n - 1)/n)^2*f1*(2*f1 - 1)^2/4/(f2 + 1)^2 + ((n - 1)/n)^2*f1^2*f2*(f1 - 1)^2/4/(f2 + 1)^4
  
  t <- round(S_Chao1_bc - D, 5)
  if (t != 0)
  {
    K <- exp(z*sqrt(log(1 + var_Chao1_bc/t^2)))
    CI_Chao1_bc <- c(D + t/K, D + t*K)
  } 
  if(t == 0)
  {
    i <- c(1:max(x))
    i <- i[unique(x)]
    var_obs <- sum(sapply(i, function(i)sum(x==i)*(exp(-i) - exp(-2*i)))) - 
      (sum(sapply(i, function(i)i*exp(-i)*sum(x==i))))^2/n
    var_Chao1_bc <- var_obs
    P <- sum(sapply(i, function(i)sum(x==i)*exp(-i)/D))
    CI_Chao1_bc <- c(max(D, D/(1 - P) - z*sqrt(var_obs)/(1 - P)), D/(1 - P) + z*sqrt(var_obs)/(1 - P))  
  }
  round(c(S_Chao1_bc,var_Chao1_bc^0.5,CI_Chao1_bc[1],CI_Chao1_bc[2]) ,1)  
}

SpecAbunAce<- function(data, k=10, conf=0.95)
{
  data <- as.numeric(data)  
  f <- function(i, data){length(data[which(data == i)])}
  basicAbun <- function(data, k)
  {
    x <- data[which(data != 0)]
    n <- sum(x)
    D <- length(x)
    n_rare <- sum(x[which(x <= k)])
    D_rare <- length(x[which(x <= k)])
    if (n_rare != 0)
    {
      C_rare <- 1 - f(1, x)/n_rare
    } 
    else 
    {
      C_rare = 1
    } 
    n_abun <- n - n_rare
    D_abun <- length(x[which(x > k)])
    
    j <- c(1:k)
    a1 <- sum(sapply(j, function(j)j*(j - 1)*f(j, x)))
    a2 <- sum(sapply(j, function(j)j*f(j, x)))
    if (C_rare != 0)
    {
      gamma_rare_hat_square <- max(D_rare/C_rare*a1/a2/(a2 - 1) - 1, 0)
      gamma_rare_1_square <- max(gamma_rare_hat_square*(1 + (1 - C_rare)/C_rare*a1/(a2 - 1)), 0)
    }
    else
    {
      gamma_rare_hat_square <- 0
      gamma_rare_1_square <- 0
    }
    CV_rare <- sqrt(gamma_rare_hat_square)
    CV1_rare <- sqrt(gamma_rare_1_square)
    
    return(c( n, D, n_rare, D_rare, C_rare, CV_rare, CV1_rare, n_abun, D_abun))
  }
  
  z <- -qnorm((1 - conf)/2)
  n <- basicAbun(data, k)[1]
  D <- basicAbun(data, k)[2]
  n_rare <- basicAbun(data, k)[3]
  D_rare <- basicAbun(data, k)[4]
  C_rare <- basicAbun(data, k)[5] 
  CV_rare <- basicAbun(data, k)[6]
  CV1_rare <- basicAbun(data, k)[7]
  n_abun <- basicAbun(data, k)[8]
  D_abun <- basicAbun(data, k)[9]
  x <- data[which(data != 0)]
  #############################
  S_ACE <- function(x, k)
  {
    j <- c(1:k)
    a1 <- sum(sapply(j, function(j)j*(j - 1)*f(j, x)))
    a2 <- sum(sapply(j, function(j)j*f(j, x)))
    if (C_rare != 0)
    {
      gamma_rare_hat_square <- max(D_rare/C_rare*a1/a2/(a2 - 1) - 1, 0)
    }
    else
    {
      gamma_rare_hat_square <- 0
    }
    S_ace <- D_abun + D_rare/C_rare + f(1, x)/C_rare*gamma_rare_hat_square
    #return(list(S_ace, gamma_rare_hat_square))
    return(c(S_ace, gamma_rare_hat_square))
  }
  s_ace <- S_ACE(x, k)[1]
  gamma_rare_hat_square <- S_ACE(x, k)[2]
  #### differential ####
  u <- c(1:k)    
  diff <- function(q){
    if (gamma_rare_hat_square != 0){
      si <- sum(sapply(u, function(u)u*(u - 1)*f(u, x)))
      if ( q == 1){
        d <- (1 - f(1, x)/n_rare + D_rare*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g1 
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*(D_rare*si + f(1, x)*si) - 
             f(1, x)*D_rare*si*(-2*(1 - f(1, x)/n_rare)*(n_rare - f(1, x))/n_rare^2*n_rare*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*(2*n_rare - 1))
          )/(1 - f(1, x)/n_rare)^4/n_rare^2/(n_rare - 1)^2 - #g2
          (1 - f(1, x)/n_rare + f(1, x)*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g3
      } else if(q > k){
        d <- 1
      } else {
        d <- (1 - f(1, x)/n_rare - D_rare*q*f(1, x)/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g1
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*f(1, x)*(si + D_rare*q*(q - 1)) - 
             f(1, x)*D_rare*si*(2*(1 - f(1, x)/n_rare)*f(1, x)*q/n_rare^2*n_rare*(n_rare - 1) + 
                                  (1 - f(1, x)/n_rare)^2*q*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*n_rare*q)
          )/(1 - f(1, x)/n_rare)^4/(n_rare)^2/(n_rare - 1)^2 + #g2
          (q*(f(1, x))^2/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g3
      }
      return(d)
    } else {
      if ( q == 1){
        d <- (1 - f(1, x)/n_rare + D_rare*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g1 
      } else if(q > k){
        d <- 1
      } else {
        d <- (1 - f(1, x)/n_rare - D_rare*q*f(1, x)/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g1
      }
      return(d)  
    }
  }
  COV.f <- function(i,j){
    if (i == j){
      cov.f <- f(i, x)*(1 - f(i, x)/s_ace)
    } else {
      cov.f <- -f(i, x)*f(j, x)/s_ace
    }     
    return(cov.f)
  }
  
  i <- rep(sort(unique(x)),each = length(unique(x)))
  j <- rep(sort(unique(x)),length(unique(x)))       # all combination
  
  var_ace <- sum(mapply(function(i, j)diff(i)*diff(j)*COV.f(i, j), i, j))
  if (var_ace > 0){
    var_ace <- var_ace
  } else {
    var_ace <- NA
  }
  ######################
  t <- round(s_ace - D, 5)
  if (is.nan(t) == F){
    if (t != 0){
      C <- exp(z*sqrt(log(1 + var_ace/(s_ace - D)^2)))
      CI_ACE <- c(D + (s_ace - D)/C, D + (s_ace - D)*C)
    } else {
      i <- c(1:max(x))
      i <- i[unique(x)]
      var_obs <- sum(sapply(i, function(i)f(i, x)*(exp(-i) - exp(-2*i)))) - 
        (sum(sapply(i, function(i)i*exp(-i)*f(i, x))))^2/n
      var_ace <- var_obs
      P <- sum(sapply(i, function(i)f(i, x)*exp(-i)/D))
      CI_ACE <- c(max(D, D/(1 - P) - z*sqrt(var_obs)/(1 - P)), D/(1 - P) + z*sqrt(var_obs)/(1 - P))  
    }
  }else{
    CI_ACE <- c(NaN, NaN)
  }
  table <- c(s_ace, sqrt(var_ace), CI_ACE)
  return(table)
}

SpecAbunAce1 <-function(data ,k=10, conf=0.95)
{
  data <- as.numeric(data)
  f <- function(i, data){length(data[which(data == i)])}
  basicAbun <- function(data, k){
    x <- data[which(data != 0)]
    n <- sum(x)
    D <- length(x)
    n_rare <- sum(x[which(x <= k)])
    D_rare <- length(x[which(x <= k)])
    if (n_rare != 0){
      C_rare <- 1 - f(1, x)/n_rare
    } else {
      C_rare = 1
    } 
    n_abun <- n - n_rare
    D_abun <- length(x[which(x > k)])
    
    j <- c(1:k)
    a1 <- sum(sapply(j, function(j)j*(j - 1)*f(j, x)))
    a2 <- sum(sapply(j, function(j)j*f(j, x)))
    if (C_rare != 0){
      gamma_rare_hat_square <- max(D_rare/C_rare*a1/a2/(a2 - 1) - 1, 0)
      gamma_rare_1_square <- max(gamma_rare_hat_square*(1 + (1 - C_rare)/C_rare*a1/(a2 - 1)), 0)
    }else{
      gamma_rare_hat_square <- 0
      gamma_rare_1_square <- 0
    }
    CV_rare <- sqrt(gamma_rare_hat_square)
    CV1_rare <- sqrt(gamma_rare_1_square)
    return(c( n, D, n_rare, D_rare, C_rare, CV_rare, CV1_rare, n_abun, D_abun))
  }
  
  z <- -qnorm((1 - conf)/2)
 
  n <- basicAbun(data, k)[1]
  D <- basicAbun(data, k)[2]
  n_rare <- basicAbun(data, k)[3]
  D_rare <- basicAbun(data, k)[4]
  C_rare <- basicAbun(data, k)[5] 
  CV_rare <- basicAbun(data, k)[6]
  CV1_rare <- basicAbun(data, k)[7]
  n_abun <- basicAbun(data, k)[8]
  D_abun <- basicAbun(data, k)[9]
  x <- data[which(data != 0)]
  #############################
  S_ACE1 <- function(x, k){
    j <- c(1:k)
    a1 <- sum(sapply(j, function(j)j*(j - 1)*f(j, x)))
    a2 <- sum(sapply(j, function(j)j*f(j, x)))
    if (C_rare != 0){
      gamma_rare_hat_square <- max(D_rare/C_rare*a1/a2/(a2 - 1) - 1, 0)
      gamma_rare_1_square <- max(gamma_rare_hat_square*(1 + (1 - C_rare)/C_rare*a1/(a2 - 1)), 0)
    }else{
      gamma_rare_hat_square <- 0
      gamma_rare_1_square <- 0
    }
    s_ace1 <- D_abun + D_rare/C_rare + f(1, x)/C_rare*gamma_rare_1_square
    
    return(c(s_ace1, gamma_rare_1_square))
  }
  s_ace1 <- S_ACE1(x, k)[1]
  gamma_rare_1_square <- S_ACE1(x, k)[2]
  #### differential ####
  u <- c(1:k)    
  diff <- function(q){
    if (gamma_rare_1_square != 0){
      u <- c(1:k)
      si <- sum(sapply(u, function(u)u*(u-1)*f(u, x)))
      if ( q == 1){
        d <- (1 - f(1, x)/n_rare + D_rare*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g1 
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*(D_rare*si + f(1, x)*si) - 
             f(1, x)*D_rare*si*(-2*(1 - f(1, x)/n_rare)*(n_rare - f(1, x))/n_rare^2*n_rare*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*(2*n_rare - 1))
          )/(1 - f(1, x)/n_rare)^4/n_rare^2/(n_rare - 1)^2 - #g2
          (1 - f(1, x)/n_rare + f(1, x)*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g3
          ((1 - f(1, x)/n_rare)^3*(n_rare*(n_rare - 1))^2*(2*f(1, x)*D_rare*si^2 + f(1, x)^2*si^2) - #g4
             f(1, x)^2*D_rare*si^2*(3*(1 - f(1, x)/n_rare)^2*(f(1, x) - n_rare)/(n_rare)^2*(n_rare*(n_rare - 1))^2 + 
                                      (1 - f(1, x)/n_rare)^3*2*n_rare*(n_rare - 1)^2 + (1 - f(1, x)/n_rare)^3*n_rare^2*2*(n_rare - 1)) 
          )/(1 - f(1, x)/n_rare)^6/n_rare^4/(n_rare - 1)^4 - 
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*(2*f(1, x)*si) - #g5
             f(1, x)^2*si*(2*(1 - f(1, x)/n_rare)*(f(1, x) - n_rare)/n_rare^2*n_rare*(n_rare - 1) + 
                             (1 - f(1, x)/n_rare)^2*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*n_rare) 
          )/(1 - f(1, x)/n_rare)^4/n_rare^2/(n_rare - 1)^2
      } else if(q > k){
        d <- 1
      } else {
        d <- (1 - f(1, x)/n_rare - D_rare*q*f(1, x)/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g1
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*f(1, x)*(si + D_rare*q*(q - 1)) - 
             f(1, x)*D_rare*si*(2*(1 - f(1, x)/n_rare)*f(1, x)*q/n_rare^2*n_rare*(n_rare - 1) + 
                                  (1 - f(1, x)/n_rare)^2*q*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*n_rare*q)
          )/(1 - f(1, x)/n_rare)^4/(n_rare)^2/(n_rare - 1)^2 + #g2
          (q*(f(1, x))^2/n_rare^2)/(1 - f(1, x)/n_rare)^2 + #g3
          ((1 - f(1, x)/n_rare)^3*n_rare^2*(n_rare - 1)^2*f(1, x)^2*(si^2 + 2*D_rare*si*q*(q - 1)) - #g4
             f(1, x)^2*D_rare*si^2*(3*(1 - f(1, x)/n_rare)^2*(f(1, x)*q/n_rare^2)*(n_rare*(n_rare - 1))^2 + 
                                      2*(1 - f(1, x)/n_rare)^3*n_rare*q*(n_rare - 1)^2 + 2*(1 - f(1, x)/n_rare)^3*n_rare^2*(n_rare - 1)*q)   
          )/(1 - f(1, x)/n_rare)^6/(n_rare)^4/(n_rare - 1)^4 - 
          ((1 - f(1, x)/n_rare)^2*n_rare*(n_rare - 1)*f(1, x)^2*q*(q - 1) - #g5
             f(1, x)^2*si*(2*(1 - f(1, x)/n_rare)*f(1, x)*q/n_rare^2*n_rare*(n_rare - 1) + 
                             (1 - f(1, x)/n_rare)^2*q*(n_rare - 1) + (1 - f(1, x)/n_rare)^2*n_rare*q)
          )/(1 - f(1, x)/n_rare)^4/(n_rare)^2/(n_rare - 1)^2
      }
      return(d)
    } else {
      u <- c(1:k)
      si <- sum(sapply(u, function(u)u*(u-1)*f(u, x)))
      if ( q == 1){
        d <- (1 - f(1, x)/n_rare + D_rare*(n_rare - f(1, x))/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g1 
      } else if(q > k){
        d <- 1
      } else {
        d <- (1 - f(1, x)/n_rare - D_rare*q*f(1, x)/n_rare^2)/(1 - f(1, x)/n_rare)^2 #g1
      }
      return(d)
    }
  }
  
  COV.f <- function(i,j){
    if (i == j){
      cov.f <- f(i, x)*(1 - f(i, x)/s_ace1)
    } else {
      cov.f <- -f(i, x)*f(j, x)/s_ace1
    }     
    return(cov.f)
  }
  
  i <- rep(sort(unique(x)),each = length(unique(x)))
  j <- rep(sort(unique(x)),length(unique(x)))       # all combination
  
  var_ace1 <- sum(mapply(function(i, j)diff(i)*diff(j)*COV.f(i, j), i, j))
  if (var_ace1 > 0){
    var_ace1 <- var_ace1
  } else {
    var_ace1 <- NA
  }
  ######################
  t <- round(s_ace1 - D, 5)
  if (is.nan(t) == F){
    if (t != 0){
      C <- exp(z*sqrt(log(1 + var_ace1/(s_ace1 - D)^2)))
      CI_ACE1 <- c(D + (s_ace1 - D)/C, D + (s_ace1 - D)*C)
    } else {
      i <- c(1:max(x))
      i <- i[unique(x)]
      var_obs <- sum(sapply(i, function(i)f(i, x)*(exp(-i) - exp(-2*i)))) - 
        (sum(sapply(i, function(i)i*exp(-i)*f(i, x))))^2/n
      var_ace1 <- var_obs
      P <- sum(sapply(i, function(i)f(i, x)*exp(-i)/D))
      CI_ACE1 <- c(max(D, D/(1 - P) - z*sqrt(var_obs)/(1 - P)), D/(1 - P) + z*sqrt(var_obs)/(1 - P))  
    }
  }else{
    CI_ACE1 <- c(NaN, NaN)
  }
  table <-c(s_ace1, sqrt(var_ace1), CI_ACE1)
  return(table)
}


EstiBootComm.Ind <- function(Spec)
{
	Sobs <- sum(Spec > 0) 	#observed species
	n <- sum(Spec)		  	#sample size
	f1 <- sum(Spec == 1) 	#singleton 
	f2 <- sum(Spec == 2) 	#doubleton
	a <- ifelse(f1 == 0, 0, (n - 1) * f1 / ((n - 1) * f1 + 2 * f2) * f1 / n)
	b <- sum(Spec / n * (1 - Spec / n) ^ n)
	w <- a / b  			#adjusted factor for rare species in the sample
	f0.hat <- ceiling(ifelse(f2 == 0, (n - 1) / n * f1 * (f1 - 1) / 2, (n - 1) / n * f1 ^ 2/ 2 / f2))	#estimation of unseen species via Chao1
	Prob.hat <- Spec / n * (1 - w * (1 - Spec / n) ^ n)					#estimation of relative abundance of observed species in the sample
	Prob.hat.Unse <- rep(2 * f2/((n - 1) * f1 + 2 * f2), f0.hat)		#estimation of relative abundance of unseen species in the sample
	return(c(Prob.hat, Prob.hat.Unse))				#Output: a vector of estimated relative abundance
}

entropy_MEE_equ=function(X)
{
  x=X
  x=x[x>0]
  n=sum(x)
  UE <- sum(x/n*(digamma(n)-digamma(x)))
  f1 <- sum(x == 1)
  f2 <- sum(x == 2)
  if(f1>0)
  {
     A <-1-ifelse(f2 > 0, (n-1)*f1/((n-1)*f1+2*f2), (n-1)*f1/((n-1)*f1+2))
     B=sum(x==1)/n*(1-A)^(-n+1)*(-log(A)-sum(sapply(1:(n-1),function(k){1/k*(1-A)^k})))
  }
  if(f1==0){B=0}
  UE+B
}
entropy_HT_equ<-function(X)
{
    x=X
    x=x[x>0]
    n=sum(x)
    f1=sum(x==1)
    C_head=1-f1/n
    a=-sum(C_head*(x/n)*log(C_head*(x/n))/(1-(1-C_head*(x/n))^n))
    a
}
entropy_J1_equ=function(X)
{
    X=X[X>0]
    Y=X[X>1]
    n=sum(X)
    -n*sum(X/n*log(X/n))-(n-1)/n*sum( (n-X)*(-X/(n-1)*log(X/(n-1))) )-(n-1)/n*sum(-Y*(Y-1)/(n-1)*log((Y-1)/(n-1)))   
}
entropy_MLE_equ=function(X)
{
    X=X[X>0]
    n=sum(X)
    -sum(X/n*log(X/n))
}
entropy_MLE_bc_equ=function(X)
{
    entropy_MLE_equ(X)+(SpecAbunAce(X)[1]-1)/2/sum(X)
}
Shannon_index=function(x,boot=50)
{
  x=x[x>0]
  n=sum(x)
  MLE=entropy_MLE_equ(x) 
  MLE_bc=entropy_MLE_bc_equ(x)
  J1=entropy_J1_equ(x)
  HT=entropy_HT_equ(x)
  MEE=entropy_MEE_equ(x)
  p_hat=EstiBootComm.Ind(x)
  Boot.X=rmultinom(boot,n,p_hat)
  temp1=apply(Boot.X,2,entropy_MLE_equ)
  temp2=apply(Boot.X,2,entropy_MLE_bc_equ)
  temp3=apply(Boot.X,2,entropy_J1_equ)
  temp4=apply(Boot.X,2,entropy_HT_equ)
  temp5=apply(Boot.X,2,entropy_MEE_equ)
     MLE_sd=sd(temp1)
  MLE_bc_sd=sd(temp2)
      J1_sd=sd(temp3)
      HT_sd=sd(temp4)
     MEE_sd=sd(temp5)

     MLE_exp_sd=sd(exp(temp1))
  MLE_bc_exp_sd=sd(exp(temp2))
      J1_exp_sd=sd(exp(temp3))
      HT_exp_sd=sd(exp(temp4))  
     MEE_exp_sd=sd(exp(temp5))

  a=matrix(0,10,4)
  a[1,]=c(MLE,MLE_sd,MLE-1.96*MLE_sd,MLE+1.96*MLE_sd)
  a[2,]=c(MLE_bc,MLE_bc_sd,MLE_bc-1.96*MLE_bc_sd,MLE_bc+1.96*MLE_bc_sd)
  a[3,]=c(J1,J1_sd,J1-1.96*J1_sd,J1+1.96*J1_sd)
  a[4,]=c(HT,HT_sd,HT-1.96*HT_sd,HT+1.96*HT_sd)
  a[5,]=c(MEE,MEE_sd,MEE-1.96*MEE_sd,MEE+1.96*MEE_sd)
  a[6,]=c(exp(MLE),MLE_exp_sd,exp(MLE)-1.96*MLE_exp_sd,exp(MLE)+1.96*MLE_exp_sd)
  a[7,]=c(exp(MLE_bc),MLE_bc_exp_sd,exp(MLE_bc)-1.96*MLE_bc_exp_sd,exp(MLE_bc)+1.96*MLE_bc_exp_sd)
  a[8,]=c(exp(J1),J1_exp_sd,exp(J1)-1.96*J1_exp_sd,exp(J1)+1.96*J1_exp_sd)
  a[9,]=c(exp(HT),HT_exp_sd,exp(HT)-1.96*HT_exp_sd,exp(HT)+1.96*HT_exp_sd)
 a[10,]=c(exp(MEE),MEE_exp_sd,exp(MEE)-1.96*MEE_exp_sd,exp(MEE)+1.96*MEE_exp_sd)
  return(a)
}

simpson_MLE_equ=function(X)
{
   X=X[X>0]
   n=sum(X)
   a=sum((X/n)^2)
   a 
}
simpson_MVUE_equ=function(X)
{
   X=X[X>0]
   n=sum(X)
   a=sum(X*(X-1))/n/(n-1)
   a 
}

Simpson_index=function(x,boot=200)
{
   x=x[x>0]
   n=sum(x)
   MVUE=simpson_MVUE_equ(x)
   MLE=simpson_MLE_equ(x)
   
   ACE=SpecAbunAce(x)[1]
 
   AA=sum(  ( x*(x-1)/n/(n-1)-x*(2*n-1)/n/(n-1)*MVUE  )^2  )
   BB=sum( x*(x-1)/n/(n-1)-x*(2*n-1)/n/(n-1)*MVUE   )
   MVUE_sd=(AA-BB^2/ACE)^0.5

   AA=sum(  ( (x/n)^2-2*x/n*MLE  )^2  )
   BB=sum( (x/n)^2-2*x/n*MLE   )
   MLE_sd=(AA-BB^2/ACE)^0.5

  
   MVUE_recip_sd=MVUE_sd/MVUE
    MLE_recip_sd=MLE_sd/MLE
  
   a=matrix(0,4,4)
   a[1,]=c(MVUE,MVUE_sd,MVUE-1.96*MVUE_sd,MVUE+1.96*MVUE_sd)
   a[2,]=c(MLE,MLE_sd,MLE-1.96*MLE_sd,MLE+1.96*MLE_sd)
   a[3,]=c(1/MVUE,MVUE_recip_sd,1/MVUE-1.96*MVUE_recip_sd,1/MVUE+1.96*MVUE_recip_sd)
   a[4,]=c(1/MLE,MLE_recip_sd,1/MLE-1.96*MLE_recip_sd,1/MLE+1.96*MLE_recip_sd)
   return(a)
}

#X=read.table("Data4a.txt")
#Y=read.table("Data4b1_t.txt")
#Diversity(datatype="Abundance",X)
#Diversity(datatype="Frequencies_of_Frequencies",Y)

print.spadeDiv <- function(x, digits = max(3L, getOption("digits") - 3L), ...){
  
  cat("\n(1)  BASIC DATA INFORMATION:\n")
  print(x$BASIC.DATA)
  cat("\n(2)  ESTIMATION OF SPECIES RICHNESS (DIVERSITY OF ORDER 0):\n\n")
  print(x$SPECIES.RICHNESS)
  cat("
      95% Confidence interval: A log-transformation is used so that the lower bound 
      of the resulting interval is at least the number of observed species. 
      See Chao (1987).
      
      Chao1 (Chao, 1984): This approach uses the numbers of singletons and doubletons to
      estimate the number of missing species because missing species information is
      mostly concentrated on those low frequency counts; see Chao (1984), Shen, Chao and Lin (2003)
      and Chao, Shen and Hwang (2006).
      
      Chao1-bc: a bias-corrected form for the Chao1; see Chao (2005).
      
      ACE (Abundance-based Coverage Estimator): A non-parametric estimator proposed by Chao and Lee (1992)
      and Chao, Ma and Yang (1993).  The observed species are separated as rare and abundant groups;
      only the rare group is used to estimate the number of missing species.
      The estimated CV is used to characterize the degree of heterogeneity among species
      discovery probabilities.  See Eq.(2.14) in Chao and Lee (1992) or Eq.(2.2) of Chao et al. (2000).
      
      ACE-1: A modified ACE for highly heterogeneous communities; See Eq.(2.15) of Chao and Lee (1992).
      ")
  cat("\n(3a)  SHANNON INDEX:\n\n")
  print(x$SHANNON.INDEX)
  cat("\n")
  cat(" For a review of the four estimators, see Chao and Shen (2003).\n")
  cat("
      MLE: maximum likelihood estimator.
      MLE_bc: bias-corrected maximum likelihood estimator.
      Jackknife: see Zahl (1977).
      Chao & Shen: based on Horvitz-Thompson estimator and sample coverage method;
      see Chao and Shen (2003). 
	  Chao (2013): An nearly unbiasd estimator of entropy, see Chao et al. (2013).
	  Estimated standard error is based on a bootstrap method.
      \n")
  
  cat("(3b)  EXPONENTIAL OF SHANNON INDEX (DIVERSITY OF ORDER 1):\n\n")
  print(x$EXPONENTIAL.OF.SHANNON.INDEX)
  
  cat("\n(4a)  SIMPSON INDEX:\n\n")
  print(x$SIMPSON.INDEX)
  
  cat("
     MVUE: minimum variance unbiased estimator; see Eq. (2.27) of Magurran (1988).
     MLE: maximum likelihood estimator; see Eq. (2.26) of Magurran (1988).
     ")
  
  cat("\n(4b)  INVERSE OF SIMPSON INDEX (DIVERSITY OF ORDER 2):\n\n")
  print(x$INVERSE.OF.SIMPSON.INDEX)
  
  cat("\n(5)  The estimates of Hill's number at order q from 0 to 3\n\n")
  print(x$HILL.NUMBERS)
  
  cat("
     Chao: see Chao and Jost (2015).
	 Empirical: maximum likelihood estimator.
     ")
	 
}



Chao_Hill_abu = function(x,q){
  x = x[x>0]
  n = sum(x)
  f1 = sum(x==1)
  f2 = sum(x==2)
  p1 = ifelse(f2>0,2*f2/((n-1)*f1+2*f2),ifelse(f1>0,2/((n-1)*(f1-1)+2),1))
  
  Sub <- function(q){
    if(q==0){
      sum(x>0) + (n-1)/n*ifelse(f2>0, f1^2/2/f2, f1*(f1-1)/2)
    }
    else if(q==1){
      r <- 1:(n-1)
      A <- sum(x/n*(digamma(n)-digamma(x)))
      B <- ifelse(f1==0|p1==1,0,f1/n*(1-p1)^(1-n)*(-log(p1)-sum((1-p1)^r/r)))
      exp(A+B)
    }else if(abs(q-round(q))==0){
      A <- sum(exp(lchoose(x,q)-lchoose(n,q)))
      ifelse(A==0,NA,A^(1/(1-q)))
    }else {
      sort.data = sort(unique(x))
      tab = table(x)
      term = sapply(sort.data,function(z){
        k=0:(n-z)
        sum(choose(k-q,k)*exp(lchoose(n-k-1,z-1)-lchoose(n,z)))
      })
      r <- 0:(n-1)
      A = sum(tab*term)
      B = ifelse(f1==0|p1==1,0,f1/n*(1-p1)^(1-n)*(p1^(q-1)-sum(choose(q-1,r)*(p1-1)^r)))
      (A+B)^(1/(1-q))
    }
  }
  sapply(q, Sub)
}



Chao_Hill_inc = function(x,q){
  n = x[1]
  x = x[-1];x = x[x>0]
  U = sum(x)
  f1 = sum(x==1)
  f2 = sum(x==2)
  p1 = ifelse(f2>0,2*f2/((n-1)*f1+2*f2),ifelse(f1>0,2/((n-1)*(f1-1)+2),1))
  r <- 0:(n-1)
  Sub <- function(q){
    if(q==0){
      sum(x>0) + (n-1)/n*ifelse(f2>0, f1^2/2/f2, f1*(f1-1)/2)
    }
    else if(q==1){
      A <- sum(x/U*(digamma(n)-digamma(x)))
      B <- ifelse(f1==0|p1==1,0,f1/U*(1-p1)^(1-n)*(-log(p1)-sum(sapply(1:(n-1), function(r)(1-p1)^r/r))))
      exp(A+B)*U/n
    }else if(abs(q-round(q))==0){
      A <- sum(exp(lchoose(x,q)-lchoose(n,q)))
      ifelse(A==0,NA,((n/U)^q*A)^(1/(1-q)))
    }else {
      sort.data = sort(unique(x))
      tab = table(x)
      term = sapply(sort.data,function(z){
        k=0:(n-z)
        sum(choose(k-q,k)*exp(lchoose(n-k-1,z-1)-lchoose(n,z)))
      })
      A = sum(tab*term)
      B = ifelse(f1==0|p1==1,0,f1/n*(1-p1)^(1-n)*(p1^(q-1)-sum(choose(q-1,r)*(p1-1)^r)))
      ((n/U)^q*(A+B))^(1/(1-q))
    }
  }
  sapply(q, Sub)
}



Chao_Hill = function(x,q,datatype = c("abundance","incidence")){
  datatype = match.arg(datatype,c("abundance","incidence"))
  if(datatype == "abundance"){
    est = Chao_Hill_abu(x,q)
  }else{
    est = Chao_Hill_inc(x,q)
  }
  return(est)
}



Hill <- function(x,q,datatype = c("abundance","incidence")){
  if(datatype=="incidence"){x = x[-1]}
  p <- x[x>0]/sum(x)
  Sub <- function(q){
    if(q==0) sum(p>0)
    else if(q==1) exp(-sum(p*log(p)))
    else exp(1/(1-q)*log(sum(p^q)))
  }
  sapply(q, Sub)
}



Bt_prob_abu = function(x){
  x = x[x>0]
  n = sum(x)
  f1 = sum(x==1)
  f2 = sum(x==2)
  C = 1 - f1/n*ifelse(f2>0,(n-1)*f1/((n-1)*f1+2*f2),ifelse(f1>0,(n-1)*(f1-1)/((n-1)*(f1-1)+2),0))
  W = (1-C)/sum(x/n*(1-x/n)^n)
  
  p.new = x/n*(1-W*(1-x/n)^n)
  f0 = ceiling(ifelse(f2>0,(n-1)/n*f1^2/(2*f2),(n-1)/n*f1*(f1-1)/2))
  p0 = (1-C)/f0
  p.new=c(p.new,rep(p0,f0))
  return(p.new)
}



Bt_prob_inc = function(x){
  n = x[1]
  x = x[-1]
  U = sum(x)
  f1 = sum(x==1)
  f2 = sum(x==2)
  A = ifelse(f2>0,2*f2/((n-1)*f1+2*f2),ifelse(f1>0,2/((n-1)*(f1-1)+2),1))
  C=1-f1/U*(1-A)
  W=U/n*(1-C)/sum(x/n*(1-x/n)^n)
  
  p.new=x/n*(1-W*(1-x/n)^n)
  f0 = ceiling(ifelse(f2>0,(n-1)/n*f1^2/(2*f2),(n-1)/n*f1*(f1-1)/2))
  p0=U/n*(1-C)/f0
  p.new=c(p.new,rep(p0,f0))
  return(p.new)
}



Bt_prob = function(x,datatype = c("abundance","incidence")){
  datatype = match.arg(datatype,c("abundance","incidence"))
  if(datatype == "abundance"){
    prob = Bt_prob_abu(x)
  }else{
    prob = Bt_prob_inc(x)
  }
  return(prob)
}


Bootstrap.CI = function(x,q,B = 200,datatype = c("abundance","incidence"),conf = 0.95){
  datatype = match.arg(datatype,c("abundance","incidence"))
  p.new = Bt_prob(x,datatype)
  n = ifelse(datatype=="abundance",sum(x),x[1])
  # set.seed(456)
  if(datatype=="abundance"){
    data.bt = rmultinom(B,n,p.new)
  }else{
    data.bt = rbinom(length(p.new)*B,n,p.new) 
    data.bt = matrix(data.bt,ncol=B)
    #data.bt = rbind(rep(n,B),data.bt)
  }
  
  mle = apply(data.bt,2,function(x)Hill(x,q,datatype))
  
  ################ Abundance ###############
  if(datatype == "abundance"){
    d1 = sort(unique(data.bt[data.bt>0]))
    M = max(d1)
    Sub = function(q){
      d2 = sapply(1:length(d1),function(i){
        k=0:(n-d1[i])
        sum(choose(k-q,k)*exp(lchoose(n-k-1,d1[i]-1)-lchoose(n,d1[i])))
      })
      d3 = rep(0,M)
      d3[d1] = d2
      bt.pro = sapply(1:B,function(b){
        f1=sum(data.bt[,b]==1)
        f2=sum(data.bt[,b]==2)
        if(f2>0){
          A=2*f2/((n-1)*f1+2*f2)
        }else if(f1!=0){
          A=2/((n-1)*(f1-1)+2)
        }else{
          A=1
        }
        if(q!=1){
          t1=table(data.bt[,b][data.bt[,b]>0])
          t2=as.numeric(names(t1))
          aa=d3[t2]
          e1=sum(t1*aa)
          
          if(A==1){e2=0}else{
            r=0:(n-1)
            e2=f1/n*(1-A)^(-n+1)*(A^(q-1)-sum(choose(q-1,r)*(A-1)^r))
          }
          
          if(e1+e2!=0){
            e=(e1+e2)^(1/(1-q))
          }else{e=NA}         
          
        }else{
          y2=data.bt[,b][which(data.bt[,b]>0 & data.bt[,b]<=(n-1))]
          e1=sum(y2/n*(digamma(n)-digamma(y2)))
          
          if(A==1){e2=0}else{
            r=1:(n-1)
            e2=f1/n*(1-A)^(-n+1)*(-log(A)-sum((1-A)^r/r))
          }
          e=exp(e1+e2)
        }
        e
      })
      bt.pro
    }
    pro = t(sapply(q,Sub))
  }else{
    d1 = sort(unique(data.bt[data.bt>0]))
    M = max(d1)
    Sub = function(q){
      d2 = sapply(1:length(d1),function(i){
        k = 0:(n-d1[i])
        sum(choose(k-q,k)*exp(lchoose(n-k-1,d1[i]-1)-lchoose(n,d1[i])))
      })
      d3 = rep(0,M)
      d3[d1] = d2
      
      bt.pro = sapply(1:B,function(b){
        y2=data.bt[,b];y2 = y2[y2>0]
        U = sum(y2)
        Q1=sum(y2==1)
        Q2=sum(y2==2)
        if(Q2>0){
          A=2*Q2/((n-1)*Q1+2*Q2)
        }else if(Q1!=0){
          A=2/((n-1)*(Q1-1)+2)
        }else{
          A=1
        }
        if(q!=1){
          t1=table(data.bt[,b][data.bt[,b]>0])
          t2=as.numeric(names(t1))
          aa=d3[t2]
          e1=sum(t1*aa)
          
          if(A==1){e2=0}else{
            r=0:(n-1)
            e2=Q1/n*(1-A)^(-n+1)*(A^(q-1)-sum(choose(q-1,r)*(A-1)^r))
          }
          
          if(e1+e2!=0){
            e=((n/U)^q*(e1+e2))^(1/(1-q))
          }else{e=NA}         
          
        }else{
          
          e1=sum(y2/U*(digamma(n)-digamma(y2)))
          r =1:(n-1)
          e2 = ifelse(Q1==0|A==1,0,Q1/U*(1-A)^(1-n)*(-log(A)-sum((1-A)^r/r)))
          e = exp(e1+e2)*U/n
                  }
        e
      })
      bt.pro
    }
    
    
    pro = t(sapply(q,Sub))
    
  }
  
  
  #pro = apply(data.bt,2,function(x)Chao_Hill(x,q,datatype))
  
  mle.mean = rowMeans(mle)
  pro.mean = rowMeans(pro)
  
  LCI.mle =  -apply(mle,1,function(x)quantile(x,probs = (1-conf)/2)) + mle.mean
  UCI.mle = apply(mle,1,function(x)quantile(x,probs = 1-(1-conf)/2)) - mle.mean
  
  LCI.pro =  -apply(pro,1,function(x)quantile(x,probs = (1-conf)/2)) + pro.mean
  UCI.pro = apply(pro,1,function(x)quantile(x,probs = 1-(1-conf)/2)) - pro.mean
  
  LCI = rbind(LCI.mle,LCI.pro)
  UCI = rbind(UCI.mle,UCI.pro)
  
  sd.mle = apply(mle,1,sd)
  sd.pro = apply(pro,1,function(x)sd(x,na.rm = T))
  se = rbind(sd.mle,sd.pro)
  
  return(list(LCI=LCI,UCI=UCI,se=se))
  
}


ChaoHill <- function(dat, datatype=c("abundance", "incidence"), from=0, to=3, interval=0.1, B=1000, conf=0.95){ 
  datatype = match.arg(datatype,c("abundance","incidence"))
  # for real data estimation
  
  if (is.matrix(dat) == T || is.data.frame(dat) == T){
    if (ncol(dat) != 1 & nrow(dat) != 1)
      stop("Error: The data format is wrong.")
    if (ncol(dat) == 1){
      dat <- dat[, 1]
    } else {
      dat <- dat[1, ]
    }
  }
  dat <- as.numeric(dat)
  q <- seq(from, to, by=interval)
  
  #-------------
  #Estimation
  #-------------
  MLE=Hill(dat,q,datatype)
  
  qD_pro=Chao_Hill(dat,q,datatype)
  
  CI_bound = Bootstrap.CI(dat,q,B,datatype,conf)
  se = CI_bound$se
  #-------------------
  #Confidence interval
  #-------------------
  tab.est=data.frame(rbind(MLE,qD_pro))
  
  LCI <- tab.est - CI_bound$LCI
  UCI <- tab.est + CI_bound$UCI
  
  colnames(tab.est) <- colnames(se) <- colnames(LCI) <- colnames(UCI) <- paste("q = ", q, sep="")    
  rownames(tab.est) <- rownames(se) <- rownames(LCI) <- rownames(UCI) <- c("Observed", "Chao_2013")
  return(list(EST = tab.est, 
              SD = se,
              LCI = LCI,
              UCI = UCI))
  
}




conf.reg=function(x_axis,LCL,UCL,...) {
  x.sort <- order(x_axis)
  x <- x_axis[x.sort]
  LCL <- LCL[x.sort]
  UCL <- UCL[x.sort]
  polygon(c(x,rev(x)),c(LCL,rev(UCL)), ...)
}


reshapeChaoHill <- function(out){
  
  tab <- data.frame(q=as.numeric(substring(colnames(out$EST), 5)),
                    method=rep(rownames(out$EST), each=ncol(out$EST)),
                    est=c(t(out$EST)[,1],t(out$EST)[,2]),
                    se=c(t(out$SD)[,1],t(out$SD)[,2]),
                    qD.95.LCL=c(t(out$LCI)[,1],t(out$LCI)[,2]),
                    qD.95.UCL=c(t(out$UCI)[,1],t(out$UCI)[,2]))
  tab$est <- round(tab$est,3)
  tab$se <- round(tab$se,3)
  tab$qD.95.LCL <- round(tab$qD.95.LCL,3)
  tab$qD.95.UCL <- round(tab$qD.95.UCL,3)
  
  tab
}


Diversity_Inc=function(X)
{
  X=X[,1]
  
  Hill <- reshapeChaoHill(ChaoHill(X, datatype = "incidence", from=0, to=3, interval=0.25, B=50, conf=0.95))
  #df$method <- factor(df$method, c("Observed", "Chao_2013"), c("Empirical", "Estimation"))
  Hill<-cbind(Hill[1:13,1],Hill[14:26,3],Hill[1:13,3],Hill[14:26,4],Hill[1:13,4])
  Hill<-round(Hill,3)
  Hill <- data.frame(Hill)
  colnames(Hill)<-c("q","Chao","Empirical","Chao(s.e.)","Empirical(s.e.)")
  
  
  z <- list("HILL.NUMBERS"= Hill)
  class(z) <- c("spadeDiv_Inc")
  return(z)
  
  #cat("\n")
  #cat("(5)  FISHER ALPHA INDEX:\n\n")
  #table_alpha=round(alpha(X),3)
  #colnames(table_alpha)<-c("Estimator", "Est_s.e.", paste("95% Lower Bound"), paste("95% Upper Bound"))
  #rownames(table_alpha)<-c(" alpha")
  #print( table_alpha)
  #cat("\n")
  #cat(" See Eq. (2.9) of Magurran (1988) for a definition of Fisher's alpha index.\n")
}
#X=read.table("Data4a.txt")
#Y=read.table("Data4b1_t.txt")
#Diversity(datatype="Abundance",X)
#Diversity(datatype="Frequencies_of_Frequencies",Y)

print.spadeDiv_Inc <- function(x, digits = max(3L, getOption("digits") - 3L), ...){
  
  
  cat("\n(5)  The estimates of Hill's number at order q from 0 to 3\n\n")
  print(x$HILL.NUMBERS)
  
  cat("
      Chao: see Chao and Jost (2015).
      Empirical: maximum likelihood estimator.
      ")
  
}
JohnsonHsieh/SpadeR documentation built on May 7, 2019, 12:02 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
JohnsonHsieh/SpadeR
Species Prediction and Diversity Estimation with R

R/Diversity_subroutine.R
In JohnsonHsieh/SpadeR: Species Prediction and Diversity Estimation with R

R Package Documentation

Browse R Packages

We want your feedback!

JohnsonHsieh/SpadeR Species Prediction and Diversity Estimation with R

R/Diversity_subroutine.R In JohnsonHsieh/SpadeR: Species Prediction and Diversity Estimation with R

R Package Documentation

Browse R Packages

We want your feedback!

JohnsonHsieh/SpadeR
Species Prediction and Diversity Estimation with R

R/Diversity_subroutine.R
In JohnsonHsieh/SpadeR: Species Prediction and Diversity Estimation with R