# R/PhD.m.R In iNextPD: Interpolation and Extrapolation for Phylogenetic Diversity

#### Defines functions PhD.mPhD.m.RcppPhD.m.mleStirling2

```Stirling2 <- function(n,m)
{
## Purpose:  Stirling Numbers of the 2-nd kind
##     S^{(m)}_n = number of ways of partitioning a set of
##                      \$n\$ elements into \$m\$ non-empty subsets
## Author: Martin Maechler, Date:  May 28 1992, 23:42
## ----------------------------------------------------------------
## Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)
## Closed Form : p.824 "C."
## ----------------------------------------------------------------

if (0 > m || m > n) stop("'m' must be in 0..n !")
k <- 0:m
sig <- rep(c(1,-1)*(-1)^m, length= m+1)# 1 for m=0; -1 1 (m=1)
## The following gives rounding errors for (25,5) :
## r <- sum( sig * k^n /(gamma(k+1)*gamma(m+1-k)) )
ga <- gamma(k+1)
round(sum( sig * k^n /(ga * rev(ga))))
}

# accumulation function for qPD(m)
PhD.m.mle <- function(p, labels=names(p), phy, q, m, datatype="abundance"){
if (!inherits(phy, "phylog"))
stop("Non convenient data")
tmp <- ExpandData_(p, labels, phy, datatype)
a <- tmp\$branch_abun
L <- tmp\$branch_length
T <- sum(L*a)
sub <- function(m){
if(q==0) sum(L*(1-(1-a)^m))
else if(q==1) {
a1 <- a[a>0 & a<1]
sub1 <- function(k) {
#sum(L[a>0 & a<1]*choose(m,k)*a1^k*(1-a1)^(m-k))
sum(L[a>0 & a<1]*exp(lchoose(m,k)+log(a1)*k+log(1-a1)*(m-k)))
}
k <- 1:m
gk <- sapply(k, sub1)
Hm <- sum(-k/m/T*log(k/m/T)*gk)
exp(Hm)
} else if(q==2) {
lam <- sum(L*((m-1)/m*a^2+1/m*a))
T/(lam/T)
} else{
sub2 <- function(x, j){
a1 <- Stirling2(q, j) * exp(lgamma(m+1) - lgamma(m-j+1) - q*log(m))
b1 <- x^j
a1*b1
}
tmp <- rep(0, length(a))
for(i in 1:length(a)){
tmp[i] <- L[i] * sum(sapply(1:q, function(j) sub2(a[i], j)))
}
lam <- sum(tmp)
T*(lam/T)^(1/(1-q))
}
}
sapply(m, sub)
}

PhD.m.Rcpp <- function(n, x, U, L, q, m){
#require("Rcpp")
# no visible binding for global variable [variable name]
RPD <- NULL
tmp <- as.matrix(cbind(U,L))
data <- x
t_bar <- sum(tmp[,1] * tmp[,2] / n)
tmp <- as.matrix(tmp)
Rcpp::cppFunction('
double RPD(NumericMatrix x , int n  , int m , int q) {
int nrow = x.nrow();
double tbar=0;
NumericVector ghat(m);
for (int i = 0; i < nrow; i++) {
tbar += x(i, 0)*x(i, 1)/n;
}
for (int k = 0; k < m ; k++) {
for (int i = 0; i < nrow; i++) {
if ( x(i,0) >= k+1 && x(i,0) <= n-m+k+1 )
{
ghat[k] +=  x(i,1)*exp(Rf_lchoose(x(i,0), k+1)+Rf_lchoose(n-x(i,0), m-k-1)-Rf_lchoose(n, m)) ;
//ghat[k] +=  x(i,1)*exp(-Rf_lbeta(x(i,0)-k, k+2)-log(x(i,0)+1)-Rf_lbeta(n-x(i,0)-m+k+2, m-k)-log(n-x(i,0)+1)+Rf_lbeta(n-m+1, m+1)+log(n+1)) ;
}
else
{
ghat[k] += 0 ;
}
}
}
double out=0;
if(q == 0){
for(int j = 0; j < m; j++) {
out += ghat[j];
}
}
if(q == 1){
for (int j = 0; j < m; j++) {
out += -( (j+1) / (m*tbar) ) * log ( (j+1) / (m*tbar) ) * ghat[j]  ;
}
out = exp(out) ;
}
if(q == 2){
for (int j = 0; j < m; j++) {
out += pow( ( (j+1) / (m*tbar) ),2) * ghat[j] ;
}
out = 1 / out ;
}
return out ;
}')

RPD_m <- RPD(tmp, n, n-1, q)
obs <- RPD(tmp, n, n, q)

#asymptotic value
Asy <- function(q){

f1 <- sum(tmp[, 1] == 1)
f2 <- sum(tmp[, 1] == 2)
g1 <- sum(tmp[tmp[, 1] == 1, 2])
g2 <- sum(tmp[tmp[, 1] == 2, 2])
if(q==0){
g0_hat <- ifelse( g2>((g1*f2)/(2*f1)) , ((n-1)/n)*(g1^2/(2*g2)) , ((n-1)/n)*(g1*(f1-1)/(2*(f2+1))) )
asy <- obs+g0_hat
}
if(q==1){
# A <- 0
# if(f2>0){
#   A <- 2 * f2 / ((n-1) * f1 + 2 * f2)
# }else if(f2 == 0&&f1 > 0){
#   A <- 2/((n-1)*(f1-1)+2)
# }else{
#   A <- 0
# }
g0.hat <- ifelse((2*g2*sum(x==1))>g1*sum(x==2),
(n-1)/n*g1^2/2/g2,
(n-1)/n*g1*(sum(x==1)-1)/2/(sum(x==2)+1))
A <- g1/(n*g0.hat+g1)

i <- 1:(n-1)
qq <- sum( ((1-A)^i)/i )
h2 <- (g1/n)*((1-A)^(-n+1))*(-log(A)-qq)
tmp2 <- tmp[which((1 <= tmp[, 1] ) & ( tmp[, 1] <= ( n-1 ) ) ),]
h1 <- sum(apply(X = tmp2, MARGIN = 1, FUN = function(x){
a <- sum( 1/(x[1]:(n-1)) )
x[2]*x[1]*a/n
}))
h <- h1+h2
asy <- t_bar*exp(h/t_bar)
}
if(q==2){
tmp3 <- tmp[tmp[, 1] >= 2, ]
asy <- (sum( tmp3[, 2]*tmp3[, 1]*(tmp3[, 1]-1) / ( ((t_bar)^2)*n*(n-1) ) ) )^(-1)
}
return(asy)
}
asy <- Asy(q)
#beta
beta <- (obs-RPD_m)/(asy-RPD_m)

#Extrapolation
EPD <- function(m,q){
m <- m-n
if( q == 0 | q == 1 ){
EPD <- obs+(asy-obs)*(1-(1-beta)^m)
}
if( q == 2 ){
g <- sum( (tmp[,2]/(t_bar)^2)*((1/(n+m))*(tmp[,1]/n)+((n+m-1)/(n+m))*(tmp[,1]*(tmp[,1]-1)/(n*(n-1)))) )
EPD <- 1/g
}
return(EPD)
}
PD <- sapply(m,FUN = function(m){
if(m<n){
if(identical(m, floor(m))) RPD(tmp,n,m,q)
else{
a <- RPD(tmp,n,floor(m),q)
b <- RPD(tmp,n,floor(m)+1,q)
dat <- data.frame(x=c(floor(m), floor(m)+1),y=c(a,b))
fit <- glm(y~x, data=dat, family=quasipoisson(link = "log"))
a[1] <- predict(fit, data.frame(x=m), type = "response")
a
}
}else if(m==n){
obs
}else{
EPD(m,q)
}
})
return(PD)
}

# iNEXTPD estimator for q = 0, 1, 2, ...
PhD.m <- function(n, x, U, L, q, m){
if(q%in%c(0,1,2)){
PhD.m.Rcpp(n, x, U, L, q, m)
}else{
L.obs <- L[U>0]
U.obs <- U[U>0]
tbar <- sum(L.obs*U.obs/n)
Funq <- function(m){
sub <- function(x, j){
a <- Stirling2(q, j) * exp(lgamma(m+1) - lgamma(m-j+1) - q*log(m))
if(x > j) {
b <- exp(lgamma(x+1) - lgamma(x-j+1) - lgamma(n+1) + lgamma(n-j+1))
} else {b <- 0}
a*b
}

tmp <- rep(0, length(U.obs))
for(i in 1:length(U.obs)){
tmp[i] <- L.obs[i] * sum(sapply(1:q, function(j) sub(U.obs[i], j)))
}
lam <- sum(tmp)
tbar*(lam/tbar)^(1/(1-q))
}
sapply(m, Funq)
}
}
```

## Try the iNextPD package in your browser

Any scripts or data that you put into this service are public.

iNextPD documentation built on May 2, 2019, 3:31 a.m.