# DataInfo.link ----------------------
#' Exhibit basic data information
#'
#' \code{DataInfo.link}: exhibits basic data information
#'
#' @param data a vector/matrix/list of species abundances or incidence frequencies.\cr If \code{datatype = "incidence"},
#' then the first entry of the input data must be total number of sampling units, followed by species incidence frequencies.
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic', and 'FD' = 'Functional' under certain threshold.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).#' @return a data.frame of basic data information including sample size, observed species richness, sample coverage estimate, and the first ten abundance/incidence frequency counts.
#' @import tibble
#' @examples
#' \dontrun{
#' data(puerto.rico)
#' DataInfo.link(puerto.rico$data, diversity = 'TD', datatype="abundance")
#' DataInfo.link(puerto.rico$data, diversity = 'PD', datatype="abundance",
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' }
#' @export
DataInfo.link <- function(data, diversity = 'TD', datatype = "abundance", row.tree = NULL,col.tree = NULL){
if(diversity == 'PD'){
if(!is.null(row.tree)){row.tree$tip.label = gsub('\\.', '_',row.tree$tip.label)}
if(!is.null(col.tree)){col.tree$tip.label = gsub('\\.', '_',col.tree$tip.label)}
table <- lapply(data, function(y){datainfphy(data = y, datatype = datatype,
row.tree = row.tree,col.tree = col.tree)})%>%
do.call(rbind,.)
rownames(table) <- names(data)
table = tibble::rownames_to_column(table, var = "Networks")
}else if(diversity == 'TD'){
table <- lapply(data, function(y){datainf(data = y, datatype = datatype)})%>%do.call(rbind,.)
rownames(table) <- names(data)
table = tibble::rownames_to_column(table, var = "Networks")
}
return(table)
}
# SC.link ----
#' Sample Completeness main function
#'
#' \code{SC.link} Estimation of Sample Completeness with order q
#'
#' @param data a matrix/data.frame/list/vector of abundances-based/incidences-based species data.\cr
#' @param q a integer vector for the order of Hill number\cr
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or
#' species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).#'
#' @param nboot a positive integer specifying the number of bootstrap replications when assessing
#' sampling uncertainty and constructing confidence intervals. Enter 0 to skip the bootstrap procedures. Default is 50
#' @param conf positive number < 1 specifying the level of confidence interval, default is 0.95.\cr\cr
#' @return a matrix of estimated sample completeness with order q: \cr\cr
#'
#' @examples
#' data(Norfolk)
#' output = SC.link(Norfolk)
#' ggSC.link(output)
#'
#' @references
#' Chao,A.,Y.Kubota,D.Zelený,C.-H.Chiu.
#' Quantifying sample completeness and comparing diversities among assemblages.
#' @export
SC.link <- function(data, q = seq(0, 2, 0.2), datatype = "abundance", nboot = 30, conf = 0.95){
data_long <- lapply(data, function(tab){
as.matrix(tab)%>%c()}
)
res = iNEXT.4steps::SC(data = data_long, q = q, datatype = datatype, nboot = nboot, conf = conf)
return(res)
}
# ggSC.link -------------------------------------------------------------------
#' ggplot for Sample Completeness
#'
#' \code{ggSC.link} The figure for estimation of Sample Completeness with order q
#'
#' @param outcome a table generated from SC function
#' @return a figure of estimated sample completeness with order q
#'
#' @examples
#' data(Norfolk)
#' output = SC.link(Norfolk)
#' ggSC.link(output)
#' @export
ggSC.link <- function(outcome){
cbPalette <- rev(c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#330066", "#CC79A7", "#0072B2", "#D55E00"))
ggplot(outcome, aes(x = Order.q, y = Estimate.SC, colour = Assemblage)) +
geom_line(size = 1.2) + scale_colour_manual(values = cbPalette) +
geom_ribbon(aes(ymin = SC.LCL, ymax = SC.UCL, fill = Assemblage),
alpha = 0.2, linetype = 0) + scale_fill_manual(values = cbPalette) +
labs(x = "Order q", y = "Sample completeness") + theme(text = element_text(size = 12)) +
theme(legend.position = "bottom", legend.box = "vertical",
legend.key.width = unit(1.2, "cm"), legend.title = element_blank())
}
# iNEXT.link -------------------------------------------------------------------
#' Interpolation (rarefaction) and extrapolation of Chao et al.’s (2021) network diversity and mean network diversity
#' Function \code{iNEXT.link} computes network diversity estimates for rarefied samples and extrapolated samples
#' along with confidence intervals and related coverage estimates based on Chao et al.’s (2021) network
#' diversity (ND)
#' @param data a matrix/data.frame of species
#' abundances (for abundance data) or species-by-site incidence raw matrix/data.frame (for incidence data).\cr
#' Abundance data: a species-by-site matrix/data.frame of species abundances. The row (species) names of
#' data must match the species names in the phylogenetic tree and thus cannot be missing.\cr
#' Incidence raw data: species-by-site raw incidence matrix/data.frame. When there are N assemblages
#' and thus N matrices, users must first merge the N matrices by species identity to obtain a large
#' merged incidence matrix, where the rows of the matrix refer to all species presented in the merged
#' data. The row (species) names of data must match the species names in the phylogenetic tree and
#' thus cannot be missing.
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic', and 'FD' = 'Functional' under certain threshold.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).
#' @param q a nonnegative value or sequence specifying the diversity order. Default is \code{c(0,1,2)}.
#' @param type desired diversity type: \code{type = "PD"} for Chao et al. (2010) phylogenetic diversity
#' and \code{type = "meanPD"} for mean phylogenetic diversity (phylogenetic Hill number). Default is \code{"PD"}.
#' @param endpoint a positive integer specifying the endpoint for the rarefaction and extrapolation range.
#' If \code{NULL}, then \code{endpoint} = double of the reference sample size in each assemblage. It is ignored if \code{size} is given.
#' @param knots a positive integer specifying the number of equally-spaced knots between 1 and the \code{endpoint}. Default is 40.
#' @param size a sequence of positive integers specifying the sample sizes for which PD or meanPD estimates will be calculated.
#' If \code{NULL}, then estimates will be calculated for those sample sizes determined by the specified/default \code{endpoint}
#' and \code{knots}.
#' @param PDtype Select phylogenetic diversity type: \code{PDtype = "PD"} for Chao et al. (2010) phylogenetic diversity and
#' \code{PDtype = "meanPD"} for mean phylogenetic diversity (phylogenetic Hill number).
#' It will be used when \code{diversity = 'PD'}. Default is \code{"PD"}.
#' @param nboot a positive integer specifying the number of bootstrap replications when assessing
#' sampling uncertainty and constructing confidence intervals. Enter 0 to skip the bootstrap procedures. Default is 50
#' @param conf a positive number < 1 specifying the level of confidence interval. Default is 0.95.
#' @param col.tree phylogenetic tree of column assemblage in interaction matrix
#' @param row.tree phylogenetic tree of row assemblage in interaction matrix.
#' @import ape ggplot2 dplyr tidytree stats chaoUtility phytools iNEXT.3D
#' @importFrom phyclust get.rooted.tree.height
#' @return
#' \itemize{
#' \item{\code{$DataInfo}: A dataframe summarizing data information}
#' \item{\code{$iNextEst}: coverage-based diversity estimates along with confidence intervals}
#' (if \code{nboot > 0}) for showing diversity estimates for rarefied and extrapolated samples along with related statistics;
#' \item{\code{$AsyEst}: for
#' showing asymptotic diversity estimates along with related statistics.}
#' }
#'
#' @examples
#' \dontrun{
#' data(puerto.rico)
#' iNEXT.link(puerto.rico$data, diversity = 'TD', datatype="abundance")
#' iNEXT.link(puerto.rico$data, diversity = 'PD', datatype="abundance",
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' }
#' @references
#' Chao, A., Chiu C.-H. and Jost, L. (2010). Phylogenetic diversity measures based on Hill numbers. \emph{Philosophical Transactions of the Royal Society B.}, 365, 3599-3609. \cr\cr
#' Chao, A., Chiu, C.-H., Hsieh, T. C., Davis, T., Nipperess, D., and Faith, D. (2015). Rarefaction and extrapolation of phylogenetic diversity. \emph{Methods in Ecology and Evolution}, 6, 380-388.\cr\cr
#' Chao, A., Chiu C.-H. and Jost L. (2016). Phylogenetic diversity measures and their decomposition: a framework based on Hill numbers. pp. 141-172 in Pellens R. and Grandcolas P. (eds)
#' \emph{Biodiversity Conservation and Phylogenetic Systematics: Preserving our Evolutionary Heritage in an Extinction Crisis}, Springer. \cr\cr
#' Hsieh, T. C. and Chao, A. (2017). Rarefaction and extrapolation: making fair comparison of abundance-sensitive phylogenetic diversity among multiple assemblages. \emph{Systematic Biology}, 66, 100-111.
#' @export
iNEXT.link <- function(data, diversity = 'TD', q = c(0,1,2), datatype = "abundance", size = NULL, nT = NULL,
endpoint = NULL, knots = 40, conf = 0.95, nboot = 30,
row.tree = NULL, col.tree = NULL, PDtype = 'meanPD'
){
# User interface
TYPE <- c("abundance", "incidence", "incidence_freq", "incidence_raw")
if(is.na(pmatch(datatype, TYPE)))
stop("invalid datatype")
if(pmatch(datatype, TYPE) == -1)
stop("ambiguous datatype")
datatype <- match.arg(datatype, TYPE)
class_x <- class(data)[1]
if(datatype == "incidence"){
stop('datatype="incidence" was no longer supported after v2.0.8,
please try datatype="incidence_freq".')
}
if(datatype=="incidence_freq") datatype <- "incidence"
if(datatype=="incidence_raw"){
if(class_x=="list"){
data <- lapply(data, as.incfreq)
}else{
data <- as.incfreq(data)
}
datatype <- "incidence"
}
if ( sum(!(diversity %in% c('TD', 'PD', 'FD', 'AUC')))>0 ){stop("Please select one of below diversity: 'TD', 'PD', 'FD'",
call. = FALSE)}
res = list()
if(diversity == 'TD'){
## 1. datainfo
datainfo = DataInfo.link(data = data, diversity = diversity, datatype = datatype)
## 2. iNterpolation/ Extrapolation
data_long <- lapply(data, function(tab){
as.matrix(tab)%>%c()}
)
INEXT_est <- iNEXT.3D::iNEXT3D(data_long, diversity = 'TD', q = q,conf = conf,
nboot = nboot, knots = knots, endpoint = endpoint, size = size)
res[[1]] = datainfo
res[[2]] = INEXT_est$iNextEst
res[[3]] = INEXT_est$AsyEst
names(res) = c("DataInfo", "iNextEst", "AsyEst")
}else if(diversity == 'PD'){
if(!is.null(row.tree)){row.tree$tip.label = gsub('\\.', '_',row.tree$tip.label)}
if(!is.null(col.tree)){col.tree$tip.label = gsub('\\.', '_',col.tree$tip.label)}
## 1. datainfo
datainfo = DataInfo.link(data = data, diversity = diversity, datatype = datatype,
row.tree = row.tree, col.tree = col.tree)
## 2. iNterpolation/ Extrapolation
data_long <- lapply(data, function(tab){
as.matrix(tab)%>%c()}
## 拉長
)
NetiNE <- get.netphydiv_iNE(data = data, q = q,B = nboot,row.tree = row.tree,
col.tree = col.tree,conf = conf, knots = knots, PDtype = 'PD')
## 3. empirical and asymptotic diversity
NetDiv <- get.netphydiv(data = data,q = q,B = nboot,row.tree = row.tree,col.tree = col.tree,conf = conf, PDtype = 'PD')
res[[1]] = datainfo
res[[2]] = NetiNE
res[[3]] = NetDiv
names(res) = c("DataInfo", "iNextEst", "AsyEst")
}
return(res)
}
# iNEXTbeta.link ---------------------------
#' Interpolation (rarefaction) and extrapolation of Chao et al.’s (2021) network diversity and mean network diversity
#' Function \code{iNEXTbeta.link} Interpolation and extrapolation of Beta diversity with order q
#'
#' @param data a matrix/data.frame of species abundances (for abundance data) or species-by-site incidence raw matrix/data.frame (for incidence data).\cr
#' Abundance data: a species-by-site matrix/data.frame of species abundances. The row (species) names of
#' data must match the species names in the phylogenetic tree and thus cannot be missing.\cr
#' Incidence raw data: species-by-site raw incidence matrix/data.frame. When there are N assemblages
#' and thus N matrices, users must first merge the N matrices by species identity to obtain a large
#' merged incidence matrix, where the rows of the matrix refer to all species presented in the merged
#' data. The row (species) names of data must match the species names in the phylogenetic tree and
#' thus cannot be missing.
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic',
#' and 'FD' = 'Functional' under certain threshold.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).
#' @param q a nonnegative value or sequence specifying the diversity order. Default is \code{c(0,1,2)}.
#' @param type desired diversity type: \code{type = "PD"} for Chao et al. (2010) phylogenetic diversity
#' and \code{type = "meanPD"} for mean phylogenetic diversity (phylogenetic Hill number). Default is \code{"PD"}.
#' @param nboot a positive integer specifying the number of bootstrap replications when assessing
#' sampling uncertainty and constructing confidence intervals. Enter 0 to skip the bootstrap procedures. Default is 50
#' @param conf a positive number < 1 specifying the level of confidence interval. Default is 0.95.
#' @import tidyverse
#' @import magrittr
#' @import ggplot2
#' @import abind
#' @import ape
#' @import phytools
#' @import phyclust
#' @import tidytree
#' @import colorRamps
#' @import iNEXT.3D
#' @import future.apply
#' @import ade4
#' @import tidyr
#' @return A list of seven lists with three-diversity and four-dissimilarity.
#' @examples
#' \dontrun{
#' # example
#' data(puerto.rico)
#' beta1 = iNEXTbeta.link(data = puerto.rico$data, level = seq(0.5, 0.9, 0.4), datatype='abundance',q = c(0, 1, 2),
#' diversity = 'TD', nboot = 10, conf = 0.95)
#' beta2 = iNEXTbeta.link(networks = puerto.rico$data, level = seq(0.5, 0.9, 0.4), datatype='abundance',q = c(0, 1, 2),
#' data = 'PD', nboot = 10, conf = 0.95,
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' }
#' @references
#' Chao, A., Chazdon, R. L., Colwell, R. K. and Shen, T.-J.(2005). A new statistical approach for assessing similarity of species composition with incidence and abundance data. Ecology Letters 8, 148-159. (pdf file) Spanish translation in pp. 85-96 of Halffter, G. Soberon, J., Koleff, P. and Melic, A. (eds) 2005 Sobre Diversidad Biologica: el Sognificado de las Diversidades Alfa, Beta y Gamma. m3m-Monografias 3ercer Milenio, vol. 4, SEA, CONABIO, Grupo DIVERSITAS & CONACYT, Zaragoza. IV +242 pp.
#' Chiu, C.-H., Jost, L. and Chao*, A. (2014). Phylogenetic beta diversity, similarity, and differentiation measures based on Hill numbers. Ecological Monographs 84, 21-44.
#' @export
iNEXTbeta.link = function(data, diversity = 'TD', level = seq(0.5, 1, 0.5), datatype=c('abundance', 'incidence_raw'),
q = c(0, 1, 2),nboot = 20, conf = 0.95,
row.tree = NULL,col.tree = NULL){
if(class(data[[1]]) == 'data.frame' ){data = list(data); }
combined_list = lapply(data, function(y){
long = iNEXT.link:::ready4beta(y)%>%filter_all(any_vars(. != 0))
rownames(long) = rownames(long)%>%gsub('\\.','_',.)
return(long)
})
if(diversity == 'TD'){
dissimilarity <- iNEXTbeta(data = combined_list, diversity = 'TD',level = level, datatype = datatype,
q =q ,nboot = nboot, conf = conf)
}
else if(diversity == 'PD'){
if(!is.null(row.tree)){row.tree$tip.label = gsub('\\.', '_',row.tree$tip.label)}
if(!is.null(col.tree)){col.tree$tip.label = gsub('\\.', '_',col.tree$tip.label)}
dissimilarity = iNEXTbeta.PDlink(data = combined_list, level = level, datatype = datatype,
q =q ,row.tree = row.tree,col.tree = col.tree, nboot = nboot)
}
return(dissimilarity)
}
# ggiNEXT.link -------------------------------------------------------------------
#' ggplot2 extension for outcome from \code{iNEXT.link}
#'
#' \code{ggiNEXT.link}: the \code{\link[ggplot2]{ggplot}} extension for \code{\link{iNEXT.link}} Object to plot sample-size- and coverage-based rarefaction/extrapolation curves along with a bridging sample completeness curve
#' @param outcome a list object computed by \code{\link{iNEXT.link}}.
#' @param type three types of plots: sample-size-based rarefaction/extrapolation curve (\code{type = 1});
#' sample completeness curve (\code{type = 2}); coverage-based rarefaction/extrapolation curve (\code{type = 3}).
#' @param se a logical variable to display confidence interval around the estimated sampling curve.
#' @param facet.var create a separate plot for each value of a specified variable:
#' no separation \cr (\code{facet.var="None"});
#' a separate plot for each diversity order (\code{facet.var="Order.q"});
#' a separate plot for each assemblage (\code{facet.var="Assemblage"});
#' a separate plot for each combination of order x assemblage (\code{facet.var="Both"}).
#' @param color.var create curves in different colors for values of a specified variable:
#' all curves are in the same color (\code{color.var="None"});
#' use different colors for diversity orders (\code{color.var="Order.q"});
#' use different colors for sites (\code{color.var="Assemblage"});
#' use different colors for combinations of order x assemblage (\code{color.var="Both"}).
#' @param grey a logical variable to display grey and white ggplot2 theme.
#' @param ... other arguments passed on to methods. Not currently used.
#' @return a ggplot2 object
#' @examples
#' \dontrun{
#' data(Norfolk)
#' out1 <- iNEXT.link(Norfolk, diversity = 'TD',datatype = "abundance", nboot = 0)
#' ggiNEXT.link(outcome = out1, type = 1)
#' ggiNEXT.link(outcome = out1, type = 2)
#' ggiNEXT.link(outcome = out1, type = 3)
#'
#' #' data(puerto.rico)
#' out2 <- iNEXT.link(puerto.rico$data, diversity = 'PD', datatype="abundance",
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' ggiNEXT.link(outcome = out2, type = 1)
#' ggiNEXT.link(outcome = out2, type = 2)
#' ggiNEXT.link(outcome = out2, type = 3)
#' }
#' @export
ggiNEXT.link <- function(outcome, diversity = 'TD', type = 1,se = TRUE,facet.var = "Assemblage",
color.var = "Order.q", text.size = 12, stript.size = 12){
if(diversity == 'TD'){
iNEXT.3D::ggiNEXT3D(outcome, type = type)
}else if(diversity == 'PD'){
# output = outcome
# output$iNextEst$size_based = output$iNextEst$size_based%>%
# rename('qD'="PD", 'qD.UCL'="PD.UCL",'qD.LCL'="PD.LCL")
# iNEXT.3D::ggiNEXT3D(output, type = 1)
iNE <- outcome$iNextEst
iNE.sub <- iNE[iNE$method == "observed",]
iNE[iNE$method == "observed",]$method <- "interpolated"
ex <- iNE.sub
ex$method <- "extrapolated"
iNE <- rbind(iNE,ex)
iNE$method <- factor(iNE$method,levels = c("interpolated","extrapolated"))
iNE$Order.q = paste0("q = ", iNE$Order.q)
iNE.sub$Order.q = paste0("q = ", iNE.sub$Order.q)
if(type == 1){
# size-based
plot <- ggplot(iNE, aes(x = m,y = PD)) + geom_line(aes(color = Region,linetype = method),size = 1.2) +
facet_wrap(~Order.q, scales = "free") +
# facet_grid()
geom_ribbon(aes(x = m,ymax = PD.UCL ,ymin = PD.LCL,fill = Region),alpha = 0.25) + theme_bw()+
geom_point(aes(x = m,y = PD ,color = Region,shape = Region),size = 5,data = iNE.sub) +
xlab("Number of individuals")
}else if(type == 2){
# output <- outcome$size_based
# if (length(unique(output$Order.q)) > 1) output <- subset(output, Order.q == unique(output$Order.q)[1])
# output$y.lwr <- output$SC.LCL
# output$y.upr <- output$SC.UCL
# id <- match(c(x_name, "Method", "SC", "SC.LCL", "SC.UCL", "Assemblage", "Order.q", "qD", "qD.LCL", "qD.UCL"), names(output), nomatch = 0)
# output[,1:10] <- output[, id]
#
# xlab_name <- paste0("Number of ", xlab_name)
# ylab_name <- "Sample Coverage"
}else if(type == 3){
# coverage-based
plot <- ggplot(iNE) + geom_line(aes(x = SC,y = PD,color = Region,linetype = method),size = 1.2) +
facet_wrap(~Order.q, scales = "free") +
geom_ribbon(aes(x = SC,ymax = PD.UCL ,ymin = PD.LCL,fill = Region),alpha = 0.25) +
geom_point(aes(x = SC,y = PD ,color = Region,shape = Region),size = 5,data = iNE.sub) + theme_bw() +
xlab("Sample coverage")
}
plot +
theme(legend.position = "bottom",
legend.title=element_blank(), strip.text = element_text(size = stript.size),
text=element_text(size=text.size),
legend.key.width = unit(0.8,"cm")) +
labs(y = "Phylogenetic network diversity", lty = "Method")
# if(grey){
# g <- g +
# scale_fill_grey(start = 0, end = .4) +
# scale_colour_grey(start = .2, end = .2)
# }
}
}
# ggiNEXT_beta.link -------------------------------------------------------------------
#' ggplot2 extension for outcome from \code{iNEXT.beta.link}
#'
#' \code{ggiNEXTbeta.link}: ggplot for Interpolation and extrapolation of Beta diversity with order q
#'
#' @param outcome the outcome from \code{"iNEXTbeta.link"}
#' @param type selection of plot type : \code{type = 'B'} for plotting the gamma, alpha, and beta diversity ;
#' \code{type = 'D'} for plotting 4 turnover dissimilarities.
#' @param measurement character indicating the label of y-axis.
#' @param scale Are scales shared across all facets (\code{"fixed"}), or do they vary across rows (\code{"free_x"}), columns (\code{"free_y"}), or both rows and columns (\code{"free"})? Default is \code{"free"}.
#' @param main The title of the plot.
#' @param transp a value between 0 and 1 controlling transparency. \code{transp = 0} is completely transparent, default is 0.4.
#'
#' @return a figure for Beta diversity or dissimilarity diversity.
#'
#'
#' @return a ggplot2 object
#' @examples
#' \dontrun{
#' data(Norfolk)
#' beta1 = iNEXTbeta.link(networks = puerto.rico$data, level = seq(0.5, 1, 0.5), datatype='abundance',q = c(0, 1, 2),
#' diversity = 'TD', nboot = 10, conf = 0.95)
#' beta2 = iNEXTbeta.link(networks = puerto.rico$data, level = seq(0.5, 1, 0.5), datatype='abundance',q = c(0, 1, 2),
#' diversity = 'PD', nboot = 10, conf = 0.95,
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#'
#' ggiNEXTbeta.link(beta1,diversity = 'TD', type = 'B')
#' ggiNEXTbeta.link(beta1,diversity = 'TD', type = 'D')
#' ggiNEXTbeta.link(beta2,diversity = 'PD', type = 'B')
#' ggiNEXTbeta.link(beta2,diversity = 'PD', type = 'D')
#' }
#' @export
ggiNEXTbeta.link <- function(outcome, type = c('B', 'D'),
diversity = 'TD',
scale='free', main=NULL, transp=0.4, stript.size = 11, text.size = 13){
if (type == 'B'){
gamma = lapply(outcome, function(y) y[["gamma"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Gamma") %>% as_tibble()
alpha = lapply(outcome, function(y) y[["alpha"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Alpha") %>% as_tibble()
beta = lapply(outcome, function(y) y[["beta"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Beta") %>% as_tibble()
beta = beta %>% filter(Method != 'Observed')
beta[beta == 'Observed_alpha'] = 'Observed'
df = rbind(gamma, alpha, beta)
for (i in unique(gamma$Order)) df$Order[df$Order==i] = paste0('q = ', i)
df$div_type <- factor(df$div_type, levels = c("Gamma","Alpha","Beta"))
id_obs = which(df$Method == 'Observed')
for (i in 1:length(id_obs)) {
new = df[id_obs[i],]
new$level = new$level - 0.0001
new$Method = 'Interpolated'
newe = df[id_obs[i],]
newe$level = newe$level + 0.0001
newe$Method = 'Extrapolated'
df = rbind(df, new, newe)
}
if (diversity=='TD') { ylab = "Taxonomic diversity" }
if (diversity=='PD') { ylab = "Phylogenetic diversity" }
}
if (type=='D'){
C = lapply(outcome, function(y) y[["C"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-CqN") %>% as_tibble()
U = lapply(outcome, function(y) y[["U"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-UqN") %>% as_tibble()
V = lapply(outcome, function(y) y[["V"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-VqN") %>% as_tibble()
S = lapply(outcome, function(y) y[["S"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-SqN") %>% as_tibble()
C = C %>% filter(Method != 'Observed')
U = U %>% filter(Method != 'Observed')
V = V %>% filter(Method != 'Observed')
S = S %>% filter(Method != 'Observed')
C[C == 'Observed_alpha'] = U[U == 'Observed_alpha'] = V[V == 'Observed_alpha'] = S[S == 'Observed_alpha'] = 'Observed'
df = rbind(C, U, V, S)
for (i in unique(C$Order)) df$Order[df$Order==i] = paste0('q = ', i)
df$div_type <- factor(df$div_type, levels = c("1-CqN","1-UqN","1-VqN","1-SqN"))
id_obs = which(df$Method == 'Observed')
for (i in 1:length(id_obs)) {
new = df[id_obs[i],]
new$level = new$level - 0.0001
new$Method = 'Interpolated'
newe = df[id_obs[i],]
newe$level = newe$level + 0.0001
newe$Method = 'Extrapolated'
df = rbind(df, new, newe)
}
if (diversity=='TD') { ylab = "Taxonomic dissimilarity" }
if (diversity=='PD') { ylab = "Phylogenetic dissimilarity" }
}
lty = c(Interpolated = "solid", Extrapolated = "dashed")
# lty = c(Interpolated = "solid", Extrapolated = "twodash")
df$Method = factor(df$Method, levels = c('Interpolated', 'Extrapolated', 'Observed'))
double_size = unique(df[df$Method=="Observed",]$Size)*2
double_extrapolation = df %>% filter(Method=="Extrapolated" & round(Size) %in% double_size)
point_size = 2
ggplot(data = df, aes(x = level, y = Estimate, col = Region)) +
geom_ribbon(aes(ymin = LCL, ymax = UCL, fill = Region, col = NULL), alpha=transp) +
geom_line(data = subset(df, Method!='Observed'), aes(linetype=Method), size=1.1) +
scale_linetype_manual(values = lty) +
# geom_line(lty=2) +
geom_point(data = subset(df, Method=='Observed' & div_type=="Gamma"),shape=19, size=point_size) +
geom_point(data = subset(df, Method=='Observed' & div_type!="Gamma"),shape=1, size=point_size,stroke=1.5)+
geom_point(data = subset(double_extrapolation, div_type == "Gamma"),shape=17, size=point_size) +
geom_point(data = subset(double_extrapolation, div_type!="Gamma"),shape=2, size=point_size,stroke=1.5) +
facet_grid(div_type~Order, scales = scale) +
# facet_wrap(div_type~Order, scales = scale, switch="both") +
theme_bw() +
theme(legend.position = "bottom", legend.title = element_blank()) +
labs(x='Sample coverage', y=ylab, title=main)
}
# ggiNEXT_beta.link <- function(output, type = c('B', 'D'),
# diversity = 'TD',
# scale='free', main=NULL, transp=0.4, stript.size = 11, text.size = 13){
# if (type == 'B'){
#
# gamma = lapply(output, function(y) y[["gamma"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Gamma") %>% as_tibble()
# alpha = lapply(output, function(y) y[["alpha"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Alpha") %>% as_tibble()
# beta = lapply(output, function(y) y[["beta"]]) %>% do.call(rbind,.) %>% mutate(div_type = "Beta") %>% as_tibble()
# beta = beta %>% filter(Method != 'Observed')
# beta[beta == 'Observed_alpha'] = 'Observed'
#
# df = rbind(gamma, alpha, beta)
# for (i in unique(gamma$Order)) df$Order[df$Order==i] = paste0('q = ', i)
# df$div_type <- factor(df$div_type, levels = c("Gamma","Alpha","Beta"))
#
# id_obs = which(df$Method == 'Observed')
#
# for (i in 1:length(id_obs)) {
#
# new = df[id_obs[i],]
# new$level = new$level - 0.0001
# new$Method = 'Interpolated'
#
# newe = df[id_obs[i],]
# newe$level = newe$level + 0.0001
# newe$Method = 'Extrapolated'
#
# df = rbind(df, new, newe)
#
# }
#
# if (diversity=='TD') { ylab = "Taxonomic diversity" }
# if (diversity=='PD') { ylab = "Phylogenetic diversity" }
#
# }
#
# if (type=='D'){
#
# C = lapply(output, function(y) y[["C"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-CqN") %>% as_tibble()
# U = lapply(output, function(y) y[["U"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-UqN") %>% as_tibble()
# V = lapply(output, function(y) y[["V"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-VqN") %>% as_tibble()
# S = lapply(output, function(y) y[["S"]]) %>% do.call(rbind,.) %>% mutate(div_type = "1-SqN") %>% as_tibble()
# C = C %>% filter(Method != 'Observed')
# U = U %>% filter(Method != 'Observed')
# V = V %>% filter(Method != 'Observed')
# S = S %>% filter(Method != 'Observed')
# C[C == 'Observed_alpha'] = U[U == 'Observed_alpha'] = V[V == 'Observed_alpha'] = S[S == 'Observed_alpha'] = 'Observed'
#
# df = rbind(C, U, V, S)
# for (i in unique(C$Order)) df$Order[df$Order==i] = paste0('q = ', i)
# df$div_type <- factor(df$div_type, levels = c("1-CqN","1-UqN","1-VqN","1-SqN"))
#
# id_obs = which(df$Method == 'Observed')
#
# for (i in 1:length(id_obs)) {
#
# new = df[id_obs[i],]
# new$level = new$level - 0.0001
# new$Method = 'Interpolated'
#
# newe = df[id_obs[i],]
# newe$level = newe$level + 0.0001
# newe$Method = 'Extrapolated'
#
# df = rbind(df, new, newe)
#
# }
#
# if (diversity=='TD') { ylab = "Taxonomic dissimilarity" }
# if (diversity=='PD') { ylab = "Phylogenetic dissimilarity" }
#
# }
#
# lty = c(Interpolated = "solid", Extrapolated = "dotted")
# df$Method = factor(df$Method, levels = c('Interpolated', 'Extrapolated', 'Observed'))
#
# double_size = unique(df[df$Method=="Observed",]$Size)*2
# double_extrepolation = df %>% filter(Method=="Extrapolated" & round(Size) %in% double_size)
#
# # ggplot(data = df, aes(x = level, y = Estimate)) +
# plot = ggplot(data = df, aes(x = level, y = Estimate, col = Region)) +
# # geom_ribbon(aes(ymin = LCL, ymax = UCL, fill = Region, col = NULL), alpha=transp) +
# geom_ribbon(aes(ymin = LCL, ymax = UCL, fill = Region), alpha=transp) +
# geom_line(data = subset(df, Method!='Observed'), aes(linetype=Method), size=1.1) + scale_linetype_manual(values = lty) +
# # geom_line(lty=2) +
# geom_point(data = subset(df, Method=='Observed' & div_type=="Gamma"),shape=19, size=3) +
# geom_point(data = subset(df, Method=='Observed' & div_type!="Gamma"),shape=1, size=3,stroke=1.5)+
# geom_point(data = subset(double_extrepolation, div_type == "Gamma"),shape=17, size=3) +
# geom_point(data = subset(double_extrepolation, div_type!="Gamma"),shape=2, size=3,stroke=1.5) +
# facet_grid(div_type~Order, scales = scale) +
# theme_bw() +
# theme(legend.position = "bottom", legend.title = element_blank(),
# strip.text = element_text(size = stript.size), text = element_text(size = text.size),
# legend.key.width = unit(1,"cm")) +
# labs(x='Sample coverage', y=ylab, title=main)
# if(length(output) == 1){
# plot = plot + guides(col=FALSE, fill = FALSE)
# }
# return(plot)
# }
# Asy.link -------------------------------------------------------------------
#' Asymptotic diversity q profile
#'
#' \code{Asy.link} The estimated and empirical diversity of order q
#'
#' @param data a matrix/data.frame of species
#' abundances (for abundance data) or species-by-site incidence raw matrix/data.frame (for incidence data).\cr
#' Abundance data: a species-by-site matrix/data.frame of species abundances. The row (species) names of
#' data must match the species names in the phylogenetic tree and thus cannot be missing.\cr
#' Incidence raw data: species-by-site raw incidence matrix/data.frame. When there are N assemblages
#' and thus N matrices, users must first merge the N matrices by species identity to obtain a large
#' merged incidence matrix, where the rows of the matrix refer to all species presented in the merged
#' data. The row (species) names of data must match the species names in the phylogenetic tree and
#' thus cannot be missing.
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic', and 'FD' = 'Functional' under certain threshold.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).
#' @param q a nonnegative value or sequence specifying the diversity order. Default is \code{c(0,1,2)}.
#' @param nboot a positive integer specifying the number of bootstrap replications when assessing
#' sampling uncertainty and constructing confidence intervals. Enter 0 to skip the bootstrap procedures. Default is 50
#' @param conf a positive number < 1 specifying the level of confidence interval. Default is 0.95.
#' @param col.tree phylogenetic tree of column assemblage in interaction matrix
#' @param row.tree phylogenetic tree of row assemblage in interaction matrix.
#' @return a table of Asymptotic network diversity q profile
#'
#' @examples
#' \dontrun{
#' ## Ex.1
#' data(Norfolk)
#' out1 <- Asy.link(Norfolk, diversity = 'TD', datatype = "abundance", nboot = 10)
#' ggAsy.link(out1)
#' ## Ex.2
#' data(puerto.rico)
#' out2 <- Asy.link(puerto.rico$data, diversity = 'PD', datatype = "abundance", nboot = 10,
#' row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' ggAsy.link(out2)
#' }
#' @export
Asy.link <- function(data, diversity = 'TD', q = seq(0, 2, 0.2), datatype = "abundance", nboot = 30, conf = 0.95,
row.tree = NULL, col.tree = NULL){
if(diversity == 'TD'){
NetDiv <- lapply(1:length(data), function(i) {
x = data[[i]]
assemblage = names(data)[[i]]
# please note that nboot has to larger than 0
res = MakeTable_Proposeprofile(data = x, B = nboot, q, conf = conf)%>%
mutate(Network = assemblage, method = "Estimated")%>%
filter(Target == "Diversity")%>%select(-Target, -`s.e.`)%>%
rename("qD"="Estimate", "qD.LCL"="LCL", "qD.UCL"="UCL", 'Method' = 'method')
return(res)
})%>%do.call("rbind",.)
return(NetDiv)
}else if(diversity == 'PD'){
NetDiv <- get.netphydiv(data = data,q = q,B = nboot,row.tree = row.tree,col.tree = col.tree,conf = conf)%>%
filter(method == "Estimate")%>%
mutate(method = ifelse(method == 'Estimate','Estimated',method ))%>%
dplyr::select(Order.q, Estimate, LCL, UCL, Region, method )%>%
set_colnames(c('Order.q', 'qD', 'qD.LCL','qD.UCL', 'Network', 'Method'))
return(NetDiv)
}
}
# Obs.link -------------------------------------------------------------------
#' Empirical diversity q profile
#'
#' \code{Obs.link} The estimated and empirical diversity of order q
#'
#' @param data a matrix/data.frame of species
#' abundances (for abundance data) or species-by-site incidence raw matrix/data.frame (for incidence data).\cr
#' Abundance data: a species-by-site matrix/data.frame of species abundances. The row (species) names of
#' data must match the species names in the phylogenetic tree and thus cannot be missing.\cr
#' Incidence raw data: species-by-site raw incidence matrix/data.frame. When there are N assemblages
#' and thus N matrices, users must first merge the N matrices by species identity to obtain a large
#' merged incidence matrix, where the rows of the matrix refer to all species presented in the merged
#' data. The row (species) names of data must match the species names in the phylogenetic tree and
#' thus cannot be missing.
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic', and 'FD' = 'Functional' under certain threshold.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).
#' @param q a nonnegative value or sequence specifying the diversity order. Default is \code{c(0,1,2)}.
#' Enter 0 to skip the bootstrap procedures. Default is 50.
#' @param conf a positive number < 1 specifying the level of confidence interval. Default is 0.95.
#' @param col.tree phylogenetic tree of column assemblage in interaction matrix
#' @param row.tree phylogenetic tree of row assemblage in interaction matrix.
#' @return a table of Empirical network diversity q profile
#'
#' @examples
#' \dontrun{
#' ## Example for abundance-based data
#' ## Ex.1
#' data(Norfolk)
#' out1 <- Obs.link(Norfolk, diversity = 'TD', datatype = "abundance", nboot = 30)
#' ggObs.link(out1)
#' ## Ex.2
#' data(puerto.rico)
#' out2 <- Obs.link(data = puerto.rico$data, diversity = 'PD', datatype = "abundance",
#" nboot = 10, row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' ggObs.link(out2)
#' }
#' @export
Obs.link <- function(data, diversity = 'TD', q = seq(0, 2, 0.2), datatype = "abundance", nboot = 50, conf = 0.95,
col.tree = NULL, row.tree = NULL){
if(diversity == 'TD'){
NetDiv <- lapply(1:length(data), function(i){
x = data[[i]]
assemblage = names(data)[[i]]
tmp <- c(as.matrix(x))
## nboot has to larger than 0
res = MakeTable_Empericalprofile(data = x, B = nboot, q, conf = conf)%>%
mutate(Network = assemblage, method = "Empirical")%>%
filter(Target == "Diversity")%>%select(-Target, -`s.e.`)%>%
rename("qD"="Emperical", "qD.LCL"="LCL", "qD.UCL"="UCL", 'Method' = 'method')
return(res)
})%>%do.call("rbind",.)
return(NetDiv)
}
else if(diversity == 'PD'){
NetDiv <- get.netphydiv(data = data,q = q,B = nboot,row.tree = row.tree,
col.tree = col.tree,conf = conf)%>%
filter(method == "Empirical")%>%
dplyr::select(Order.q, Estimate, LCL, UCL, Region, method )%>%
set_colnames(c('Order.q', 'qD', 'qD.LCL','qD.UCL', 'Network', 'Method'))
return(NetDiv)
}
}
# ggObs.link -------------------------------------------------------------------
#' ggplot for Empirical Network diversity
#'
#' \code{ggObs.link} Plots q-profile based on the outcome of \code{Obs.link} using the ggplot2 package.\cr
#' @param outcome the outcome of the functions \code{Obs.link} .\cr
#' @param text.size control the text size of the output plot.
#' @return a figure of estimated sample completeness with order q\cr\cr
#'
#' @examples
#' \dontrun{
#' ## Ex.1
#' data(Norfolk)
#' out1 <- Obs.link(Norfolk, diversity = 'TD', datatype = "abundance", nboot = 10)
#' ggObs.link(out1)
#' ## Ex.2
#' data(puerto.rico)
#' out2 <- Obs.link(puerto.rico$data, diversity = 'PD', datatype = "abundance",
#' nboot = 10, row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' ggObs.link(out2)
#' }
#' @export
ggObs.link <- function(outcome, diversity = 'TD', text.size = 14){
cbPalette <- rev(c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#330066", "#CC79A7", "#0072B2", "#D55E00"))
if(diversity == 'TD'){
ylab = 'taxonomic diversity'
}else if(diversity == 'PD') {
ylab = 'phylogenetic diversity'
}
ggplot(outcome, aes(x = Order.q, y = qD, colour = Network, lty = Method)) +
geom_line(size = 1.2) + scale_colour_manual(values = cbPalette) +
geom_ribbon(data = outcome[outcome$method == "Empirical", ],
aes(ymin = qD.LCL, ymax = qD.UCL, fill = Network), alpha = 0.2, linetype = 0) +
scale_fill_manual(values = cbPalette) +
scale_linetype_manual(values = c(Estimated = 1, Empirical = 2)) +
labs(x = "Order q", y = ylab) + theme(text = element_text(size = 10)) +
theme(legend.position = "bottom", legend.box = "vertical",
legend.key.width = unit(1.1, "cm"), legend.title = element_blank(),
legend.margin = margin(0, 0, 0, 0), legend.box.margin = margin(-10,-10, -5, -10),
text = element_text(size = text.size)
)
# +labs(x = "Order q", y = "Network phylogenetic diversity", lty = "Method") + scale_linetype_manual(values=c("dashed","solid"))
}
# ggAsy.link -------------------------------------------------------------------
#' ggplot for Asymptotic Network diversity
#'
#' \code{ggAsy.link} Plots q-profile based on the outcome of \code{Asy.link} using the ggplot2 package.\cr
#'
#' @param outcome the outcome of the functions \code{Asy.link} .\cr
#' @param text.size control the text size of the output plot.
#' @return a figure of estimated sample completeness with order q\cr\cr
#'
#' @examples
#' \dontrun{
#' ## Ex.1
#' data(Norfolk)
#' out1 <- Asy.link(Norfolk, diversity = 'TD', datatype = "abundance", nboot = 10)
#' ggAsy.link(out1)
#' ## Ex.2
#' data(puerto.rico)
#' out2 <- Asy.link(puerto.rico$data, diversity = 'PD', datatype = "abundance",
#' nboot = 10, row.tree = puerto.rico$row.tree, col.tree = puerto.rico$col.tree)
#' ggAsy.link(out2)
#' }
#' @export
ggAsy.link <- function(outcome, diversity = 'TD', text.size = 14){
cbPalette <- rev(c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#330066", "#CC79A7", "#0072B2", "#D55E00"))
# if (sum(unique(outcome$method) %in% c("Estimate", "Empirical")) == 0)
# stop("Please use the outcome from specified function 'AsyD'")
if(diversity == 'TD'){
ylab = 'taxonomic diversity'
}else if(diversity == 'PD') {
ylab = 'phylogenetic diversity'
}
ggplot(outcome, aes(x = Order.q, y = qD, colour = Network, lty = Method)) +
geom_line(size = 1.2) + scale_colour_manual(values = cbPalette) +
geom_ribbon(data = outcome[outcome$method == "Estimated", ],
aes(ymin = qD.LCL, ymax = qD.UCL, fill = Network), alpha = 0.2, linetype = 0) +
scale_fill_manual(values = cbPalette) +
scale_linetype_manual(values = c(Estimated = 1, Empirical = 2)) +
labs(x = "Order q", y = ylab) + theme(text = element_text(size = 10)) +
theme(legend.position = "bottom", legend.box = "vertical",
legend.key.width = unit(1.1, "cm"), legend.title = element_blank(),
legend.margin = margin(0, 0, 0, 0), legend.box.margin = margin(-10,-10, -5, -10),
text = element_text(size = text.size)
)
# +labs(x = "Order q", y = "Network phylogenetic diversity", lty = "Method") + scale_linetype_manual(values=c("dashed","solid"))
}
# estimateD.link -------------------------------------------------------------------
#' Compute species diversity with a particular of sample size/coverage
#'
#' \code{estimateD.link} computes species diversity (Hill numbers with q = 0, 1 and 2) with a particular user-specified level of sample size or sample coverage.
#'
#' @param data a \code{matrix}, \code{data.frame} (species by assemblages), or \code{list} of species abundance/incidence raw data.\cr
#' @param diversity a choice of three-level diversity: 'TD' = 'Taxonomic', 'PD' = 'Phylogenetic', and 'FD' = 'Functional' under certain threshold. Besides,'AUC' is the fourth choice which
#' integrates several threshold functional diversity to get diversity.
#' @param q a numerical vector of the order of Hill number.
#' @param datatype data type of input data: individual-based abundance data (\code{datatype = "abundance"}) or species by sampling-units incidence matrix (\code{datatype = "incidence_raw"}).
#' @param base comparison base: sample-size-based (\code{base="size"}) or coverage-based \cr (\code{base="coverage"}).
#' @param nboot a positive integer specifying the number of bootstrap replications when assessing
#' sampling uncertainty and constructing confidence intervals. Enter 0 to skip the bootstrap procedures. Default is 50
#' @param level a sequence specifying the particular sample sizes or sample coverages(between 0 and 1).
#' If \code{base="size"} and \code{level=NULL}, then this function computes the diversity estimates for the minimum sample size among all sites extrapolated to double reference sizes.
#' If \code{base="coverage"} and \code{level=NULL}, then this function computes the diversity estimates for the minimum sample coverage among all sites extrapolated to double reference sizes.
#' @param conf a positive number < 1 specifying the level of confidence interval, default is 0.95.
#' @param col.tree phylogenetic tree of column assemblage in interaction matrix
#' @param row.tree phylogenetic tree of row assemblage in interaction matrix.
#' @return a data.frame of species diversity table including the sample size, sample coverage, method (rarefaction or extrapolation), and diversity estimates with q = 0, 1, and 2 for the user-specified sample size or sample coverage.
#'
#' @examples
#' \dontrun{
#' data(Norfolk)
#' out1 <- estimateD.link(Norfolk, diversity = 'TD',datatype="abundance",
#' base="coverage", level=0.7, nboot = 30)
#' out2 <- estimateD.link(Norfolk, diversity = 'TD',datatype="abundance",
#' base="size", level=0.7, nboot = 30)
#' }
#' @export
estimateD.link = function(data, diversity = 'TD', q = c(0, 1, 2),datatype = "abundance",base = "size",
level = NULL,nboot = 50,conf = 0.95,
row.tree = NULL, col.tree = NULL){
if(diversity == 'TD'){
div = lapply(1:length(data), function(i){
x = data[[i]]
assemblage = names(data)[[i]]
long = as.matrix(x)%>%c()
iNEXT.3D::estimate3D(long, q=q,datatype=datatype, base=base,
diversity = 'TD', nboot = nboot,conf=conf)%>%
mutate(Assemblage = assemblage)
})%>%do.call("rbind",.)
return(div)
}else if(diversity == 'PD'){
if(datatype=='abundance'){
if(class(data)=="data.frame" | class(data)=="matrix"| class(data)=="integer" ) data = list(Region_1 = data)
if(class(data)== "list"){
if(is.null(names(data))) region_names = paste0("Region_", 1:length(data)) else region_names = names(data)
Ns = sapply(data, ncol)
data_list = data
}
}
if(is.null(conf)) conf = 0.95
tmp = qnorm(1 - (1 - conf)/2)
for_each_region = function(data_2d, region_name, N){
if (datatype=='abundance') {
n = sum(data_2d)
if(base == 'coverage'){
size_m = sapply(level, function(i) iNEXT.beta:::coverage_to_size(data_2d, i, datatype='abundance'))
}else if(base == 'size'){
size_m = level
}
ref= iNEXT.3D:::Coverage(data_2d,m= n, datatype = 'abundance')
#
aL_table = create.aili(data_2d, row.tree = row.tree, col.tree = col.tree) %>%
select(branch.abun, branch.length, tgroup)%>%
filter(branch.abun>0)
qPDm <- iNEXTPD2:::PhD.m.est(ai = aL_table$branch.abun,
Lis = aL_table$branch.length%>%as.matrix(),
m = size_m,
q = q,nt = n,cal = 'PD')%>%
as.vector()
## boot
tbar <- sum(aL_table$branch.length*aL_table$branch.abun)/n
if(nboot >1 ){
boot.sam <- sample.boot.phy(data_2d,nboot,row.tree = row.tree,col.tree = col.tree)
PD.sd <- lapply(boot.sam, function(aL_boot){
tmp = iNEXTPD2:::PhD.m.est(ai = aL_boot$branch.abun,
Lis = aL_boot$branch.length%>%as.matrix(),
m = size_m,
q = q,nt = n,cal = 'PD')%>%
as.vector()%>%as.data.frame()
return(tmp)
})%>%
abind(along=3) %>% apply(1:2, sd)%>%as.vector()
}else{
PD.sd = c(0,0,0)
}
##
len = length(q)
res = data.frame(Assemblage = rep(region_name,len),
goalSC = rep(level, len),
SC = rep(iNEXT.3D:::Coverage(data_2d, datatype='abundance', size_m), len),
m = rep(size_m,len),
Method = ifelse(level > ref, 'Extrapolated', 'Interpolated'),
Order.q = q,
qPD = qPDm,
qPD.UCL = qPDm+1.96*PD.sd,
qPD.LCL = qPDm-1.96*PD.sd
)
return(res)
}
}
print(length(data))
output = lapply(1:length(data), function(i) for_each_region(data_2d = data_list[[i]],
region_name = region_names[i], N = Ns[i]))%>%
do.call('rbind',.)
return(output)
}
}
### old version
# estimateD.link = function(data, diversity = 'TD', q = c(0, 1, 2),datatype = "abundance",base = "size",
# level = NULL,nboot = 50,conf = 0.95,
# row.tree = NULL, col.tree = NULL){
# if(diversity == 'TD'){
# div = lapply(1:length(data), function(i){
# x = data[[i]]
# assemblage = names(data)[[i]]
# long = as.matrix(x)%>%c()
# iNEXT.3D::estimate3D(long, q=q,datatype=datatype, base=base,
# diversity = 'TD', nboot = nboot,conf=conf)%>%
# mutate(Assemblage = assemblage)
# })%>%do.call("rbind",.)
#
# return(div)
# }else if(diversity == 'PD'){
#
# q <- unique(ceiling(q))
# ci <- qnorm(conf/2+0.5)
#
# if (is.null(level)) {
# if (datatype == "abundance") {
# level <- sapply(data, function(x) {
# ni <- sum(x)
# Coverage(data = x, datatype = datatype, m = 2 * ni, nt = ni)
# })
# }
# else if (datatype == "incidence_raw") {
# # level <- sapply(data, function(x) {
# # ni <- ncol(x)
# # Coverage(data = x, datatype = datatype, m = 2 *
# # ni, nt = ni)
# # })
# }
# level <- min(level)
# }
# # level =0.7
# res = lapply(1:length(data), function(i){
# x = data[[i]]
# assemblage = names(data)[[i]]
# long = as.matrix(x)%>%c()
# m_target = coverage_to_size(x, C = level, datatype = 'abundance')
# # if(base == "size"){m_target = level}
# inex <- function(data,m,q,B,row.tree = NULL,col.tree = NULL) {
# data <- as.matrix(data)
# n <- sum(data)
# phydata <- create.aili(data,row.tree = row.tree,col.tree = col.tree)
# ## why tbar can be calculated by this?
# ## (ai * Li) / n
# tbar <- sum(phydata$branch.length*phydata$branch.abun)/n
# boot.sam <- sample.boot.phy(data,B,row.tree = row.tree,col.tree = col.tree)
# sc <- PhD:::Coverage(data,datatype = "abundance",m,nt =n)
# # sc <- coverage(data,m)
# sc.sd <- lapply(boot.sam,function(x){
# x <- x[x$tgroup == "Tip",]$branch.abun
# PhD:::Coverage(x,datatype = "abundance",m,nt =n)
# })
# sc.sd <- do.call(cbind,sc.sd)
# sc.sd <- sapply(1:length(m), function(x){
# sd(sc.sd[x,])
# })
# sc.table <- data.frame(m=m,SC = sc, SC.UCL = sc+ci * sc.sd,SC.LCL = sc - ci * sc.sd)
# out <- lapply(q, function(x){
# PD <- lapply(m,function(y){
# my_PhD.m.est(ai = phydata$branch.abun, Lis = phydata$branch.length, m = y, q = x, nt = n, cal = 'PD')/tbar
# # PhD:::PhD.m.est(phydata,y,x,datatype = "abundance",nt = n)/tbar
# })%>%unlist()
# PD.sd <- lapply(boot.sam, function(z){
# tmp <- lapply(m,function(y){
# # PhD:::PhD.m.est(z,y,x,datatype = "abundance",nt = n)/tbar
# my_PhD.m.est(ai = z$branch.abun, Lis = z$branch.length, m = y, q = x, nt = n, cal = 'PD')/tbar
# })
# unlist(tmp)
# })
# PD.sd <- do.call(cbind,PD.sd)
# PD.sd <- sapply(1:length(m), function(x){
# sd(PD.sd[x,])
# })
# PD.table <- data.frame(m=m,method = ifelse(m<n,"interpolated",ifelse(n == m,"observed","extrapolated")),
# Order.q = x,PD = PD, PD.UCL = PD+ci * PD.sd,PD.LCL = PD - ci * PD.sd)
# out <- left_join(PD.table,sc.table)
# out
# })
# do.call(rbind,out)
# }
#
# inex(data = x,m = m_target, q,B = nboot,row.tree,col.tree)%>%
# mutate(Assemblage = assemblage)
#
# })%>%do.call("rbind",.)
# return(res)
# }
# }
# Spec.link -------------------------------------------------------------------
#' Specialization Estimation of Evenness with order q
#'
#' \code{Spec.link} computes Evenness Estimation of Evenness with order q.
#'
#' @param outcome the outcome of the functions \code{ObsND} .\cr
#' @return A list of estimated(empirical) evenness with order q.
#' Different lists represents different classes of Evenness.
#' Each list is combined with order.q and sites.
#' If "method" is estimated, then fist list will be named "C" which means the maximum standardized coverage between all double reference sample size.
#
# $summary individual summary of 4 steps of data.
#'
#' @examples
#' \dontrun{
#' data(Norfolk)
#' Est <- Spec.link(x = Norfolk, datatype = "abundance", q = c(0,1,2),
#' nboot = 30, method = "Estimated")
#' Emp <- Spec.link(x = Norfolk, datatype = "abundance", q = c(0,1,2),
#' nboot = 30, method = "Empirical")
#' Est
#' Emp
#' ggSpec(Est)
#' ggSpec(Emp)
#' }
#' @export
Spec.link <- function(data, q = seq(0, 2, 0.2),
diversity = 'TD',
datatype = "abundance",
method = "Estimated",
nboot = 30,
conf = 0.95,
E.class = c(1:5),
C = NULL){
if (diversity == 'TD'){
long = lapply(data, function(da){da%>%as.data.frame()%>%gather(key = "col_sp", value = "abundance")%>%.[,2]})
Spec <- lapply(E.class, function(e){
each_class = lapply(seq_along(long), function(i){
res = iNEXT.4steps::Evenness(long[[i]], q = q,datatype = datatype,
method = method, nboot=nboot, E.class = e, C = C)
res['Coverage'] = NULL
res = lapply(res, function(each_class){
each_class%>%
mutate(Evenness = 1-Evenness, Even.LCL = 1-Even.LCL, Even.UCL = 1-Even.UCL)%>%
rename('Specialization'='Evenness', 'Spec.LCL' ='Even.LCL', 'Spec.UCL' ='Even.UCL')%>%
mutate(Network = names(long)[[i]])
})
# if(method == "Empirical") index = 1
# if(method == "Estimated") index = 2
# return(res[[index]]%>%mutate(Assemblage = names(long)[[i]]))
return(res[[1]])
})%>%do.call("rbind",.)
each_class%>%mutate(class = paste0("E",e))
})
names(Spec) = paste0("E",E.class)
return(Spec)
}else if (diversity == 'PD'){
long = lapply(data, function(da){
da%>%as.data.frame()%>%gather(key = "col_sp", value = "abundance")%>%column_to_rownames('col_sp')
})
names(long) = names(data)
Spec <- lapply(E.class, function(e){
each_class = lapply(seq_along(data), function(i){
res = Spec.PD(data[[i]], q = q,datatype = datatype,
method = method, nboot=nboot, E.class = e, C = C)
res['Coverage'] = NULL
res = lapply(res, function(each_class){
each_class%>%
mutate(Evenness = 1-Evenness, Even.LCL = 1-Even.LCL, Even.UCL = 1-Even.UCL)%>%
rename('Specialization'='Evenness', 'Spec.LCL' ='Even.LCL', 'Spec.UCL' ='Even.UCL')%>%
mutate(Network = names(long)[[i]])
})
# if(method == "Empirical") index = 1
# if(method == "Estimated") index = 2
# return(res[[index]]%>%mutate(Assemblage = names(long)[[i]]))
return(res[[1]])
})%>%do.call("rbind",.)
each_class%>%mutate(class = paste0("E",e))
})
spec_PD <- function(){
if (datatype == "abundance") {
qD <- Evenness.profile(data, q, "abundance", method,
E.class, C)
qD <- map(qD, as.vector)
if (nboot > 1) {
Prob.hat <- lapply(1:length(data), function(i) iNEXT.3D:::EstiBootComm.Ind(data[[i]]))
Abun.Mat <- lapply(1:length(data), function(i) rmultinom(nboot,
sum(data[[i]]), Prob.hat[[i]]))
error = apply(matrix(sapply(1:nboot, function(b) {
dat = lapply(1:length(Abun.Mat), function(j) Abun.Mat[[j]][,
b])
names(dat) = paste("Site", 1:length(dat), sep = "")
dat.qD = Evenness.profile(dat, q, "abundance",
method, E.class, C)
unlist(dat.qD)
}), nrow = length(q) * length(E.class) * length(Abun.Mat)),
1, sd, na.rm = TRUE)
error = matrix(error, ncol = length(E.class))
se = split(error, col(error))
}
else {
se = lapply(1:length(E.class), function(x) NA)
}
out <- lapply(1:length(E.class), function(k) {
tmp = data.frame(Order.q = rep(q, length(data)),
Evenness = as.vector(qD[[k]]), s.e. = as.vector(se[[k]]),
Even.LCL = as.vector(qD[[k]] - qnorm(1 - (1 -
conf)/2) * se[[k]]), Even.UCL = as.vector(qD[[k]] +
qnorm(1 - (1 - conf)/2) * se[[k]]), Assemblage = rep(names(data), each = length(q)), Method = rep(method, length(q) *
length(data)))
tmp$Even.LCL[tmp$Even.LCL < 0] <- 0
tmp
})
if (is.null(C) == TRUE)
C = unique(estimate3D(data, diversity = "TD", q = 0,
datatype = "abundance", base = "coverage", nboot = 0)$goalSC)
if (method == "Estimated") {
out <- append(C, out)
}
}
}
Spec <- lapply(E.class, function(e){
each_class = lapply(seq_along(long), function(i){
res = iNEXT.4steps::Evenness(long[[i]], q = q,datatype = datatype,
method = method, nboot=nboot, E.class = e, C = C)
res['Coverage'] = NULL
res = lapply(res, function(each_class){
each_class%>%
mutate(Evenness = 1-Evenness, Even.LCL = 1-Even.LCL, Even.UCL = 1-Even.UCL)%>%
rename('Specialization'='Evenness', 'Spec.LCL' ='Even.LCL', 'Spec.UCL' ='Even.UCL')%>%
mutate(Assemblage = names(long)[[i]])
})
# if(method == "Empirical") index = 1
# if(method == "Estimated") index = 2
# return(res[[index]]%>%mutate(Assemblage = names(long)[[i]]))
return(res[[1]])
})%>%do.call("rbind",.)
each_class%>%mutate(class = paste0("E",e))
})
# ### not finished yet
# long = lapply(data, function(da){da%>%as.data.frame()%>%gather(key = "col_sp", value = "abundance")%>%.[,2]})
#
# Spec <- lapply(E.class, function(e){
# each_class = lapply(seq_along(long), function(i){
# res = iNEXT.4steps::Evenness(long[[i]], q = q,datatype = datatype,
# method = method, nboot=nboot, E.class = e, C = C)
# res['Coverage'] = NULL
# res = lapply(res, function(each_class){
# each_class%>%
# mutate(Evenness = 1-Evenness, Even.LCL = 1-Even.LCL, Even.UCL = 1-Even.UCL)%>%
# rename('Specialization'='Evenness', 'Spec.LCL' ='Even.LCL', 'Spec.UCL' ='Even.UCL')%>%
# mutate(Assemblage = names(long)[[i]])
# })
# # if(method == "Empirical") index = 1
# # if(method == "Estimated") index = 2
# # return(res[[index]]%>%mutate(Assemblage = names(long)[[i]]))
# return(res[[1]])
# })%>%do.call("rbind",.)
#
# each_class%>%mutate(class = paste0("E",e))
# })
# names(Spec) = paste0("E",E.class)
# return(Spec)
}
}
# ggSpec.link -------------------------------------------------------------------
#' ggplot for Evenness
#
#' \code{ggSpec.link} The figure for estimation of Evenness with order q\cr
#'
#' @param outcome a table generated from \code{Spec.link} function\cr
#' @return a figure of estimated sample completeness with order q\cr
#'
#' @examples
#' \dontrun{
#' data(Norfolk)
#' Est <- Spec.link(data = Norfolk, diversity = 'TD', datatype = "abundance", q = c(0,1,2),
#' nboot = 30, method = "Estimated")
#' Emp <- Spec.link(data = Norfolk, diversity = 'TD', datatype = "abundance", q = c(0,1,2),
#' nboot = 30, method = "Empirical")
#' ggSpec(output = Est)
#' ggSpec(output = Emp)
#' }
#' @references
#' Chao,A.and Ricotta,C.(2019).Quantifying evenness and linking it to diversity, beta diversity, and similarity.
#' @export
ggSpec.link <- function(outcome){
cbPalette <- rev(c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#330066", "#CC79A7", "#0072B2", "#D55E00"))
classdata = cbind(do.call(rbind, outcome))
fig = classdata%>%
ggplot(aes(x=Order.q, y=Specialization, colour=Network, lty = Method)) +
geom_line(size=1.2) +
scale_colour_manual(values = cbPalette) +
geom_ribbon(data = classdata %>% filter(Method=="Estimated"),
aes(ymin=Spec.LCL, ymax=Spec.UCL, fill=Network),
alpha=0.2, linetype=0) +
geom_ribbon(data = classdata %>% filter(Method=="Empirical"),
aes(ymin=Spec.LCL, ymax=Spec.UCL, fill=Network),
alpha=0.2, linetype=0) +
scale_fill_manual(values = cbPalette) +
labs(x="Order q", y="Specialization") +
# theme_bw(base_size = 18) +
theme(text=element_text(size=18)) +
theme(legend.position = "bottom", legend.box = "vertical",
legend.key.width = unit(1.2,"cm"),
legend.title = element_blank(),
legend.margin = margin(0,0,0,0),
legend.box.margin = margin(-10,-10,-5,-10),
text = element_text(size=12),
plot.margin = unit(c(5.5,5.5,5.5,5.5), "pt")
)
if (length(outcome) != 1) fig = fig +
facet_wrap(~class) +
theme(strip.text.x = element_text(size=12, colour = "purple", face="bold"))
return(fig)
}
even.class = function (q, qD, S, E.class)
{
tmp = c()
if (E.class == 1)
tmp = ifelse(q != 1, (1 - qD^(1 - q))/(1 - S^(1 - q)),
log(qD)/log(S))
if (E.class == 2)
tmp = ifelse(q != 1, (1 - qD^(q - 1))/(1 - S^(q - 1)),
log(qD)/log(S))
if (E.class == 3)
tmp = (qD - 1)/(S - 1)
if (E.class == 4)
tmp = (1 - 1/qD)/(1 - 1/S)
if (E.class == 5)
tmp = log(qD)/log(S)
return(tmp)
}
# iNEXTbeta.PDlink ----
iNEXTbeta.PDlink <- function(data, level, datatype='abundance', q = c(0, 1, 2),
nboot = 20, conf = 0.95,
row.tree = NULL,col.tree = NULL){
max_alpha_coverage = F
if(datatype=='abundance'){
if(class(data)=="data.frame" | class(data)=="matrix" ) data = list(Region_1 = data)
if(class(data)== "list"){
if(is.null(names(data))) region_names = paste0("Region_", 1:length(data)) else region_names = names(data)
Ns = sapply(data, ncol)
data_list = data
}
}
if(datatype=='incidence_raw'){
if(is.null(names(data))) region_names = paste0("Region_", 1:length(data)) else region_names = names(data)
Ns = sapply(data, length)
data_list = data
}
if(is.null(conf)) conf = 0.95
tmp = qnorm(1 - (1 - conf)/2)
for_each_region = function(data, region_name, N){
#data
if (datatype=='abundance') {
n = sum(data)
data_gamma = rowSums(data)
data_gamma = data_gamma[data_gamma>0]
data_alpha = as.matrix(data) %>% as.vector
ref_gamma = iNEXT.3D:::Coverage(data_gamma, n, datatype = 'abundance')
ref_alpha = iNEXT.3D:::Coverage(data_alpha, n, datatype = 'abundance')
ref_alpha_max = iNEXT.3D:::Coverage(data_gamma, n*2, datatype = 'abundance')
ref_gamma_max = iNEXT.3D:::Coverage(data_alpha, n*2, datatype = 'abundance')
level = level[level<1]
level = c(level, ref_gamma, ref_alpha, ref_alpha_max, ref_gamma_max) %>% sort %>% unique
m_gamma = sapply(level, function(i) coverage_to_size(x = data_gamma, C = i, datatype='abundance'))
m_alpha = sapply(level, function(i) coverage_to_size(data_alpha, i, datatype='abundance'))
}
if (datatype=='abundance') {
### 1. aL_table_gamma
data_gamma_2d = long_to_wide(data_gamma)
aL_table_gamma = create.aili(data_gamma_2d, row.tree = row.tree, col.tree = col.tree) %>%
select(branch.abun, branch.length, tgroup)%>%
filter(branch.abun>0)
### 2. aL_table_alpha
plan(multisession)
aL_table_alpha = future_lapply(1:N, function(i){
x = data[data[,i]>0,i]
names(x) = rownames(data)[data[,i]>0]
aL_table = create.aili(data = x%>%long_to_wide(),row.tree = row.tree, col.tree=col.tree )%>%
select(branch.abun, branch.length, tgroup)%>%
filter(branch.abun>0)
return(aL_table)
}, future.seed = TRUE)%>%do.call("rbind",.)
get_phylogenetic_alpha_gamma <- function(aL_table_gamma, aL_table_alpha, n, m_gamma, m_alpha){
## 1. gamma
qPDm <- iNEXTPD2:::PhD.m.est(ai = aL_table_gamma$branch.abun,
Lis = aL_table_gamma$branch.length%>%as.matrix(),m = m_gamma,
q = q,nt = n,cal = 'PD')
gamma = qPDm %>% t %>%as.data.frame %>%
set_colnames(q) %>% gather(Order, Estimate) %>%
mutate(level=rep(level, 3), Coverage_real=rep(iNEXT.3D:::Coverage(data_gamma, m_gamma, datatype = 'abundance'), 3),
Size=rep(m_gamma, 3))%>%
mutate(Method = ifelse(level>=ref_gamma, ifelse(level==ref_gamma, 'Observed', 'Extrapolated'), 'Interpolated'))
## 2. alpha
qPDm = iNEXTPD2:::PhD.m.est(ai = aL_table_alpha$branch.abun, Lis = aL_table_alpha$branch.length%>%as.matrix(),
m = m_alpha,q = q,nt = n,cal = 'PD')
qPDm = qPDm/N
alpha = qPDm %>% t %>% as.data.frame %>%
set_colnames(q) %>% gather(Order, Estimate) %>%
mutate(level=rep(level, 3), Coverage_real=rep(iNEXT.3D:::Coverage(data_alpha, m_alpha, datatype = 'abundance'), 3), Size=rep(m_alpha, 3))%>%
mutate(Method = ifelse(level>=ref_gamma, ifelse(level==ref_alpha, 'Observed', 'Extrapolated'), 'Interpolated'))
res = list()
res[['gamma']] = gamma
res[['alpha']] = alpha
return(res)
}
PD_results = get_phylogenetic_alpha_gamma(aL_table_gamma, aL_table_alpha,
n = sum(data_gamma), m_gamma, m_alpha)
gamma = PD_results$gamma
alpha = PD_results$alpha
}
gamma = (gamma %>%
mutate(Method = ifelse(level>=ref_gamma,
ifelse(level==ref_gamma, 'Observed', 'Extrapolated'), 'Interpolated')))[,c(2,1,6,3,4,5)] %>%
set_colnames(c('Estimate', 'Order', 'Method', 'level', 'Coverage_real', 'Size'))
if (max_alpha_coverage==T) {
under_max_alpha = !((gamma$Order==0) & (gamma$level>ref_alpha_max))
}else{
under_max_alpha = level>0
}
gamma = gamma[under_max_alpha,]
gamma$Order = as.numeric(gamma$Order)
alpha = (alpha %>%
mutate(Method = ifelse(level>=ref_alpha, ifelse(level==ref_alpha, 'Observed', 'Extrapolated'), 'Interpolated')))[,c(2,1,6,3,4,5)] %>%
set_colnames(c('Estimate', 'Order', 'Method', 'level', 'Coverage_real', 'Size'))
alpha = alpha[under_max_alpha,]
alpha$Order = as.numeric(alpha$Order)
beta = alpha
beta$Estimate = gamma$Estimate/alpha$Estimate
beta[beta == "Observed"] = "Observed_alpha"
beta = beta %>% rbind(., cbind(gamma %>% filter(Method == "Observed") %>% select(Estimate) / alpha %>% filter(Method == "Observed") %>% select(Estimate),
Order = q, Method = "Observed", level = NA, Coverage_real = NA, Size = beta[beta$Method == "Observed_alpha", 'Size']))
C = beta %>% mutate(Estimate = ifelse(Order==1, log(Estimate)/log(N), (Estimate^(1-Order) - 1)/(N^(1-Order)-1)))
U = beta %>% mutate(Estimate = ifelse(Order==1, log(Estimate)/log(N), (Estimate^(Order-1) - 1)/(N^(Order-1)-1)))
V = beta %>% mutate(Estimate = (Estimate-1)/(N-1))
S = beta %>% mutate(Estimate = (1/Estimate-1)/(1/N-1))
if(nboot>1){
### step0. get information from data (across samples)
get_f0_hat = function(dat){
dat <- as.matrix(dat)
n <- sum(dat)
f1 <- sum(dat == 1)
f2 <- sum(dat == 2)
f0 <- ceiling(ifelse(f2 > 0 ,(n-1)/n*f1^2/2/f2,(n-1)/n*f1*(f1-1)/2))
return(f0)
}
get_g0_hat <- function(data){
n = sum(data)
f1 = sum(data==1)
f2 = sum(data==2)
aL = iNEXT.link:::create.aili(data%>%iNEXT.link:::long_to_wide(), row.tree, col.tree)%>%
select(branch.abun,branch.length)
g1 = aL$branch.length[aL$branch.abun==1] %>% sum
g2 = aL$branch.length[aL$branch.abun==2] %>% sum
g0_hat = ifelse( g2>((g1*f2)/(2*f1)) , ((n-1)/n)*(g1^2/(2*g2)) ,
((n-1)/n)*(g1*(f1-1)/(2*(f2+1))))
return(g0_hat)
}
f0_pool = get_f0_hat(data_gamma)
plan(multisession)
datainfo_each_k = future_lapply(1:ncol(data), function(k){
kth_net = data[,k]
names(kth_net) = rownames(data)
n = sum(kth_net)
g0_k = get_g0_hat(kth_net)
f1 <- sum(kth_net == 1)
f2 <- sum(kth_net == 2)
f0_k <- ceiling(ifelse(f2 > 0 ,(n-1)/n*f1^2/2/f2,(n-1)/n*f1*(f1-1)/2))
C.hat <- 1-f1/n*ifelse(f2 ==0,ifelse(f1 == 0,0,2/((n-1)*(f1-1)+2)),2*f2/((n-1)*f1+2*f2))
lambda <- ifelse(C.hat != 1,(1-C.hat)/sum(kth_net/n*(1-kth_net/n)^n),0)
p.seen <- kth_net/n*(1-lambda*(1-kth_net/n)^n)
pi_matrix = p.seen%>%iNEXT.link:::long_to_wide()
res = list()
res[['pi_matrix']] = pi_matrix
res[['f0_k']] = f0_k
res[['C.hat']] = C.hat
res[['g0_k']] = g0_k
return(res)
}, future.seed = T)
# plan(multisession)
# se = future_lapply(1:nboot, function(b){
se = future_lapply(1:nboot, function(b){
## for each bootstrap samples
### 1. get population (random assign position from candidates)
aiLi_k = lapply(1:ncol(data), function(k){
pi_matrix_k = datainfo_each_k[[k]]$pi_matrix
f0_k = datainfo_each_k[[k]]$f0_k
C.hat = datainfo_each_k[[k]]$C.hat
## pi_matrix: S1*S2 (42*98)
## S1*S2 -> B1*B2 (8736)
seen_interaction_aili = iNEXT.link:::create.aili(pi_matrix_k,row.tree,col.tree)
seen_interaction_aili%>%filter(tgroup == 'Tip')%>%summarise(sum(branch.abun))
# unseen_interaction_aili = data.frame(branch.abun = rep(pi$unseen,f0_k), branch.length = g0 / f0_k,
# tgroup = "Tip",interaction = "unseen")
unseen_interaction_aili = data.frame(branch.abun = rep(0,f0_pool), branch.length = rep(0,f0_pool),
tgroup = "Tip",interaction = "unseen")%>%
mutate(interaction = paste0(interaction, row_number()))
seen_unseen = rbind(seen_interaction_aili, unseen_interaction_aili)
## 8268 candidates out of 8977
candidates = which(seen_unseen$branch.abun == 0)
###
assigned_position = sample(candidates, size = min(f0_k, length(candidates)) , replace = F)
seen_unseen$branch.abun[assigned_position] = (1-C.hat) / f0_k
# seen_unseen <- seen_unseen%>%filter(branch.abun != 0)
seen_unseen_unique = seen_unseen%>%
group_by(interaction)%>%
summarise(tgroup = first(tgroup),
branch.abun = first(branch.abun),
branch.length = first(branch.length))
return(seen_unseen_unique)
})
length_bt = lapply(aiLi_k, function(aili){
aili[, c("branch.length", "interaction")]%>%column_to_rownames("interaction")
})%>%do.call("cbind",.)
colnames(length_bt) = paste0("net_", 1:ncol(length_bt))
p_bt = lapply(aiLi_k, function(aili){
aili[, c("branch.abun", "interaction")]%>%column_to_rownames("interaction")
})%>%do.call("cbind",.)
colnames(p_bt) = paste0("net_", 1:ncol(p_bt))
p_bt2 = p_bt%>%
cbind(tgroup = aiLi_k[[1]]$tgroup)%>%
filter(tgroup == 'Tip')%>%select(-tgroup)%>%
filter_all(any_vars(. != 0))
### 2. generate samples(x_bt)
x_bt = sapply(1:ncol(p_bt2), function(k){
n = sum(data[,k])
sapply(p_bt2[,k], function(p){rbinom(1,size=n, prob = p)})
})
p_star_bt = p_bt%>%
cbind(tgroup = aiLi_k[[1]]$tgroup)%>%
filter_all(any_vars(. != 0))
x_star_bt = sapply(1:(ncol(p_star_bt)-1), function(k){
n = sum(data[,k])
sapply(p_star_bt[,k], function(p){rbinom(1,size=n, prob = p)})
})
rownames(x_bt) = rownames(p_bt2)
### 3. estimate unkwown branch length
L0_hat = sapply(1:ncol(data), function(k){
# compare = data.frame(interaction = names(data_gamma), raw = data_gamma)%>%
compare = data.frame(interaction = rownames(data), raw = data[,k])%>%
inner_join(x_bt[,k]%>%as.data.frame()%>%rownames_to_column('interaction'),
by = 'interaction')%>%rename('sample'=".")
# compare$sample[c(5,14)] = 3
known_unseen_interaction = which(and((compare$raw==0) ,(compare$sample!=0)))
# print(paste0("length of know_unseen_interaction", length(known_unseen_interaction)))
if(length(known_unseen_interaction) == 0){ used_length = 0}else{
used_length = compare%>%filter(raw == 0, sample != 0 )%>%
## note: length_bt have same value over all k
left_join(cbind(length_bt[,c('net_1')], interaction = rownames(length_bt))%>%
as.data.frame()%>%rename("length"="V1"),
by = 'interaction')%>%pull(length)%>%as.numeric()%>%sum()
}
L0_hat_k = datainfo_each_k[[k]][['g0_k']] - used_length
# L0_hat_k = g0_hat[k] - used_length
return(L0_hat_k)
})
for(k in 1:ncol(data)){
length_bt[,k] = c(length_bt[,k]%>%.[1:(length(.)-f0_pool)],
rep(L0_hat[k]/ f0_pool, f0_pool))
}
length_bt_average = length_bt%>%rowMeans()%>%as.data.frame()%>%rownames_to_column("interaction")
###############
### With obtained sample, we can start to calculate gamma, alpha, beta diversities with data_gamma and data_alpha
### 1. aL_table_gamma
bootstrap_data_gamma = rowSums(x_bt)
bootstrap_data_gamma = bootstrap_data_gamma[bootstrap_data_gamma>0]
bt_table_data_gamma = bootstrap_data_gamma%>%
as.data.frame()%>%rownames_to_column("interaction")
bt_aL_table_gamma = bt_table_data_gamma%>%
left_join(length_bt_average , by = 'interaction')%>%
set_colnames(c("interaction", 'branch.abun', 'branch.length'))
### 2. aL_table_alpha
plan(multisession)
bt_aL_table_alpha = future_lapply(1:N, function(k){
bootstrap_data_alpha = cbind(interaction = rownames(x_bt)[x_bt[,k]>0],
branch.abun = x_bt[x_bt[,k]>0,k])%>%as.data.frame()%>%
mutate(branch.abun = as.integer(branch.abun))
aL_table = bootstrap_data_alpha%>%
left_join(length_bt_average , by = 'interaction')%>%
set_colnames(c("interaction", 'branch.abun', 'branch.length'))
return(aL_table)
}, future.seed = TRUE)%>%do.call("rbind",.)
bootstrap_data_alpha = as.matrix(x_bt) %>% as.vector
bootstrap_data_alpha = bootstrap_data_alpha[bootstrap_data_alpha>0]
m_gamma_bt = sapply(level, function(i) coverage_to_size(bootstrap_data_gamma, i, datatype='abundance'))
m_alpha_bt = sapply(level, function(i) coverage_to_size(bootstrap_data_alpha, i, datatype='abundance'))
get_phylogenetic_alpha_gamma_bootstrap <- function(aL_table_gamma, aL_table_alpha,
n, m_gamma, m_alpha){
## 1. gamma
gamma <- iNEXTPD2:::PhD.m.est(ai = aL_table_gamma$branch.abun,
Lis = aL_table_gamma$branch.length%>%as.matrix(),m = m_gamma,
q = q,nt = n,cal = 'PD')%>%t%>% as.vector()
## 2. alpha
alpha =(PhD:::PhD.m.est(ai = aL_table_alpha$branch.abun, Lis = aL_table_alpha$branch.length%>%as.matrix(),
m = m_alpha,q = q,nt = n,cal = 'PD')/N)%>%t%>% as.vector()
## 3. beta
beta_obs = (iNEXT.3D:::PD.Tprofile(ai=aL_table_gamma$branch.abun,
Lis=as.matrix(aL_table_gamma$branch.length),
q=q, nt=n, cal="PD") /
(iNEXT.3D:::PD.Tprofile(ai=aL_table_alpha$branch.abun,
Lis=as.matrix(aL_table_alpha$branch.length),
q=q, nt=n, cal="PD") / N)) %>% unlist()%>%as.vector()
res = list()
res[['gamma']] = gamma
res[['alpha']] = alpha
res[['beta_obs']] = beta_obs
return(res)
}
PD_bootstrap_results = get_phylogenetic_alpha_gamma_bootstrap(aL_table_gamma = bt_aL_table_gamma, aL_table_alpha = bt_aL_table_alpha,
n = sum(bootstrap_data_gamma), m_gamma_bt, m_alpha_bt)
gamma = PD_bootstrap_results$gamma
alpha = PD_bootstrap_results$alpha
beta_obs = PD_bootstrap_results$beta_obs
##
gamma = gamma[under_max_alpha]
alpha = alpha[under_max_alpha]
beta = gamma/alpha
gamma = c(gamma, rep(0, length(q)))
alpha = c(alpha, rep(0, length(q)))
beta = c(beta, beta_obs)
order = rep(q, each=length(level) + 1)[under_max_alpha]
beta = data.frame(Estimate=beta, order)
C = (beta %>% mutate(Estimate = ifelse(order==1,log(Estimate)/log(N),(Estimate^(1-order) - 1)/(N^(1-order)-1))))$Estimate
U = (beta %>% mutate(Estimate = ifelse(order==1,log(Estimate)/log(N),(Estimate^(order-1) - 1)/(N^(order-1)-1))))$Estimate
V = (beta %>% mutate(Estimate = (Estimate-1)/(N-1)))$Estimate
S = (beta %>% mutate(Estimate = (1/Estimate-1)/(1/N-1)))$Estimate
beta = beta$Estimate
diversity = cbind(gamma, alpha, beta, C, U, V, S) %>% as.matrix
# diversity = cbind(gamma, alpha, beta, level= c(rep(level,2), NA,NA)) %>% as.matrix
return(diversity)
# })%>%
# })%>%
}, future.seed = T)%>%
abind(along=3) %>% apply(1:2, sd)
} else {
se = matrix(0, ncol = 7, nrow = nrow(gamma))
colnames(se) = c("gamma", "alpha", "beta", "C", "U", 'V', 'S')
se = as.data.frame(se)
}
se = as.data.frame(se)
gamma = gamma %>% mutate(LCL = Estimate - tmp * se$gamma[1:(nrow(se)-length(q))],
UCL = Estimate + tmp * se$gamma[1:(nrow(se)-length(q))],
Region = region_name)
alpha = alpha %>% mutate(LCL = Estimate - tmp * se$alpha[1:(nrow(se)-length(q))],
UCL = Estimate + tmp * se$alpha[1:(nrow(se)-length(q))],
Region = region_name)
beta = beta %>% mutate( LCL = Estimate - tmp * se$beta,
UCL = Estimate + tmp * se$beta,
Region = region_name)
C = C %>% mutate( LCL = Estimate - tmp * se$C,
UCL = Estimate + tmp * se$C,
Region = region_name)
U = U %>% mutate( LCL = Estimate - tmp * se$U,
UCL = Estimate + tmp * se$U,
Region = region_name)
V = V %>% mutate( LCL = Estimate - tmp * se$V,
UCL = Estimate + tmp * se$V,
Region = region_name)
S = S %>% mutate( LCL = Estimate - tmp * se$S,
UCL = Estimate + tmp * se$S,
Region = region_name)
list(gamma = gamma, alpha = alpha, beta = beta, C = C, U = U, V = V, S = S)
}
output = lapply(1:length(data), function(i) for_each_region(data = data_list[[i]],
region_name = region_names[i], N = Ns[i]))
names(output) = region_names
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.