#' Diversification-based PD and PE
#'
#' @details This function computes two modified versions of Phylogenetic Diversity (PD) and Phylogenetic
#' endemism by considering in the calculation the in-situ diversification. the PD~in-situ~ and PE~in-situ~ are calculated
#' by coupling in the calculation an ancetral area reconstruction that divide the phylogenetic tree in two parts,
#' one that correspond to dispersion events and another that correspond to in-situ diversification events.
#' We use the in-situ diversification part to calculate both Db-PD and Db-PE
#'
#' @param W Occurrence matrix, rows are assemblages and columns are species
#' @param tree Phylogenetic hipothesis in newick format
#' @param ancestral.area One column data frame with nodes in rows and one column indicating the occurrence area of nodes
#' @param biogeo One columns data frame with rows indicating assemblages and one column indicating the biome/ecoregion of each assemblage
#' @param PD Logical, if TRUE (default) PD~in-situ~ will be computed
#' @param PE Logical, if TRUE (default) PE~in-situ~ will be computed
#'
#' @return A data frame containing the values of original PD and PE and also their in-situ diversification
#' based version
#'
#' @author Gabriel Nakamura <gabriel.nakamura.souza@@gmail.com>
#'
#' @export
#'
#' @examples
#' W_toy<- matrix(c(0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0),
#' nrow= 3,
#' ncol= 5,
#' dimnames=list(c("Comm 1", "Comm 2", "Comm 3"),
#' c(paste("s", 1:5, sep=""))))
#' data("toy_treeEx")
#' biogeo_toy <- data.frame(Ecoregion= c("A", "B", "C"))
#' ancestral_area_toy <- data.frame(state= c("ABC", "B", "C", "ABC"))
#' assemblage_phylo_metrics <- calc_insitu_metrics(W_toy,
#' toy_treeEx,
#' ancestral_area_toy,
#' biogeo_toy)
calc_insitu_metrics <- function(W,
tree,
ancestral.area,
biogeo,
PD = TRUE,
PE = TRUE){
# total phylogenetic diversity and endemism -------------------------------
if(!is.matrix(W) == TRUE){
if(is.data.frame(W) == TRUE){
W <- as.matrix(W)
rownames(W) <- 1:nrow(W)
} else{
stop("W must be a occurrence matrix with presences (1) or absences (0)")
}
}
PDt <- picante::pd(samp = W, tree = tree)$PD # faster option
PEt <- phyloregion::phylo_endemism(x = W , phy = tree, weighted = TRUE) #Phylogenetic endemism sensu Rosauer
# basic information from nodes and ancestral reconstruction ---------------
nodes.list <- get_nodes_info_core(W = W, tree = tree, ancestral.area = ancestral.area, biogeo = biogeo)
names_spp_noNull<- lapply(
lapply(
lapply(nodes.list,
function(x){
is.na(x$nodes_species)
}
),
function(x){
which(x == FALSE)
}
),
names)
nodes_species_noNull <- vector(mode = "list", length = nrow(W))
for(i in 1:length(names_spp_noNull)){
nodes_species_noNull[[i]] <- nodes.list[[i]]$nodes_species[names_spp_noNull[[i]]]
}
####organize node matrix
nodes_species_noNull_org <- nodes_species_noNull
list_matrix_nodes <- vector(mode = "list", length = length(nodes_species_noNull_org))
for(i in 1:length(nodes_species_noNull)){
if(length(nodes_species_noNull[[i]]) == 0){
list_matrix_nodes[[i]] <- NA
} else{
names_spp <- names(nodes_species_noNull[[i]])
list_nodes_org <- vector(mode = "list", length = length(names_spp))
for(j in 1:length(names_spp)){
#j= 1
matrix_nodesSpp_nonull <- matrix(NA, nrow = round(length(unlist(nodes_species_noNull[[i]][j]))), ncol= 2)
nodes_org <- c(sort(nodes_species_noNull[[i]][j][[1]],
decreasing = FALSE),
which(tree$tip.label == names_spp[j]))
for(k in 1:nrow(matrix_nodesSpp_nonull)){
matrix_nodesSpp_nonull[k, ] <- c(nodes_org[k], nodes_org[k + 1])
}
list_nodes_org[[j]]<- matrix_nodesSpp_nonull
}
list_matrix_nodes[[i]]<- list_nodes_org
}
}
# extracting all internal insitu branch length ----------------------------
list_matrix_edges <- vector(mode = "list", length= length(list_matrix_nodes))
for(i in 1:length(list_matrix_nodes)){
if(any(is.na(list_matrix_nodes[[i]])) == TRUE){
list_matrix_edges[[i]] <- NA
} else{
list_matrix_edges[[i]] <- unique(do.call(rbind,
list_matrix_nodes[[i]]))
}
}
list_brLength_divLocal <- vector(mode = "list", length = length(list_matrix_edges))
for(i in 1:length(list_matrix_edges)){
if(any(is.na(list_matrix_edges[[i]])) == TRUE){
list_brLength_divLocal[[i]] <- NA
} else{
pos_insitu_nodes<- apply(tree$edge, MARGIN = 1, function(x){
which(x == apply(as.matrix(list_matrix_edges[[i]]), MARGIN = 1, function(l) l))
})
list_brLength_divLocal[[i]]<- tree$edge.length[which(unlist(lapply(pos_insitu_nodes,
function(x) length(x) > 1)
) == TRUE)
]
}
}
# computing diversification-based PD --------------------------------------
if(PD == TRUE){
PDinsitu <- unlist(lapply(list_brLength_divLocal,
function(x){
sum(x)
}
)
)
names(PDinsitu) <- rownames(W) #PDinsitu
}
# computing diversification-based PE --------------------------------------
if(PE == TRUE){
names(list_matrix_edges) <- paste("site", 1:nrow(W), sep= "_")
all_comb <- gtools::permutations(length(list_matrix_edges), 2,
names(list_matrix_edges))
list_occurence <- vector(mode= "list", length= length(list_matrix_edges))
#calculating the number of branches co-occurence
list_matrix_edges_allTips <- vector(mode = "list", length = length(list_matrix_edges))
names(list_matrix_edges_allTips) <- names(list_matrix_edges)
for(i in 1:length(list_matrix_edges)){
# i = 718
tip_edges <- do.call(rbind, lapply(ape::nodepath(tree)[which(W[i,] == 1)], function(x){
x[(length(x) - 1):length(x)]
}))
if(all(is.na(list_matrix_edges[[i]]))){
list_matrix_edges_allTips[[i]]<- unique(tip_edges) # only tip edges
} else{
list_matrix_edges_allTips[[i]]<- unique(rbind(list_matrix_edges[[i]], tip_edges)) # tip and internal branches
}
}
for(i in 1:length(list_matrix_edges_allTips)){
posit <- which(all_comb[, 1] == names(list_matrix_edges_allTips)[i])
comb_occ <- matrix(NA, nrow = nrow(list_matrix_edges_allTips[[i]]), ncol = length(list_matrix_edges_allTips)-1)
for(j in 1:length(posit)){
if (any(is.na(list_matrix_edges_allTips[[all_comb[posit[j], 2]]])) == TRUE){
comb_occ[, j] <- FALSE
} else {
comb_occ[,j] <- list_matrix_edges_allTips[[all_comb[posit[j], 1]]][, 1] %in% list_matrix_edges_allTips[[all_comb[posit[j], 2]]][, 1] &
list_matrix_edges_allTips[[all_comb[posit[j], 1]]][, 2] %in% list_matrix_edges_allTips[[all_comb[posit[j], 2]]][, 2]
}
}
list_occurence[[i]]<- comb_occ
}
# naming list_occurrence
for(i in 1:length(list_occurence)){
if(any(is.na(list_occurence[[i]]))){
list_occurence[[i]]<- NA
} else{
colnames(list_occurence[[i]]) <- biogeo[-i, 1]
}
}
# binding occurrence and branch length
list_edge_brLen_insitu <- vector(mode = "list", length = length(list_occurence))
for(i in 1:length(list_occurence)){
# i = 30
list_edge_brLen_insitu[[i]]<- suppressWarnings(cbind(list_brLength_divLocal[[i]], list_occurence[[i]]))
}
####correcting denominator of PE for ancestral state occurrence
ancestral_nodes <- cbind(ancestral.area, node = rownames(ancestral.area)) # binding nodes name with ancestral state
### ancestral state for each node that compose each community
ancestral_comm_temp<- lapply(
lapply(list_matrix_edges_allTips,
function(x){
if(any(is.null(dim(x))) == TRUE){
NA
} else {
apply(x, MARGIN = 1, function(l){
cbind(ancestral_nodes[match(l[1], ancestral_nodes[, 2]), 1], ancestral_nodes[match(l[2], ancestral_nodes[, 2]), 1])
}
)
}
}
),
function(m) t(m)
)
### binding nodes with their respective ancestral state
list_matrix_edges_AS<- lapply(list_matrix_edges_allTips, function(x){
if(any(is.na(x)) == TRUE){
NA
} else{
cbind(x, rep(NA, nrow(x)), rep(NA, nrow(x)))
}
}
)
for(i in 1:length(list_matrix_edges_AS)){
if(any(is.null(dim(list_matrix_edges_AS[[i]]))) == TRUE){
list_matrix_edges_AS[[i]]<- NA
} else{
list_matrix_edges_AS[[i]][, c(3,4)] <- ancestral_comm_temp[[i]]
}
}
# adding ancestral occurrence and branch length
list_all_DbInfo <- vector(mode = "list", length= length(list_brLength_divLocal))
for(i in 1:length(list_matrix_edges_AS)){
if(any(is.na(list_brLength_divLocal[[i]])) == TRUE){
list_edge_brLen_insitu[[i]][, 1] <- tree$edge.length[tree$edge[, 1] %in% list_matrix_edges_allTips[[i]][, 1] &
tree$edge[, 2] %in% list_matrix_edges_allTips[[i]][, 2]
] # obtaining branch lengths for terminal tips
denom_tmp2<- rowSums(list_occurence[[i]]) # denominator for terminal tips
list_all_DbInfo[[i]]<- cbind(list_edge_brLen_insitu[[i]][,1], ((denom_tmp2) + 1)) # joining information for terminal tips
} else{ # correcting the denominator for occurrence of internal branch lengths
a<- strsplit(list_matrix_edges_AS[[i]][, 3], split= "")
b<- strsplit(list_matrix_edges_AS[[i]][, 4], split= "")
denom_tmp2<- numeric()
for(j in 1:length(a)){
if(any(is.na(b[[j]])) == TRUE){
denom_tmp2[j] <- sum(list_occurence[[i]][j,]) # denominator for tip edges
} else{
denom_tmp2[j]<- sum(list_edge_brLen_insitu[[i]][j, ][!is.na(match(names(list_edge_brLen_insitu[[i]][j, ]),
a[[j]][which(a[[j]] == b[[j]])]
)
)
]
) # denominator that counts only for bioregions that match with current occurrence
}
}
}
list_all_DbInfo[[i]] <- cbind(list_edge_brLen_insitu[[i]][,1], ((denom_tmp2) + 1)) # binding denominator and branch length for each edge
}
#### Calculating PEinsitu
PEinsitu_res <- lapply(
lapply(list_all_DbInfo,
function(x){
if(any(is.null(dim(x))) == TRUE){
NA
} else{
apply(x, MARGIN = 1,
function(l){
if(any(is.na(l)) == T){
NA
} else{
l[1]/l[2]
}
}
)
}
}
),
sum)
res_insitu_PE <- matrix(unlist(PEinsitu_res), nrow= length(list_matrix_edges_AS), ncol= 1,
dimnames = list(names(list_matrix_edges_AS), "PEinsitu"))
}
# Output for community metrics ---------------------------------------
if(PD == TRUE & PE == TRUE){
insitu_comm_metrics <- data.frame(PE = PEt, PEinsitu = res_insitu_PE, PD = PDt, PDinsitu = PDinsitu)
}
if(PD == TRUE & PE == FALSE){
insitu_comm_metrics <- data.frame(PD = PDt, PDinsitu = PDinsitu)
}
if(PE == TRUE & PD == FALSE){
insitu_comm_metrics <- data.frame(PE= PEt, PEinsitu = res_insitu_PE)
}
return(insitu_comm_metrics)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.