###################################################
## detect_closest_out_for_GD
###################################################
#' @title Detect the existence of the closest outgroups for each gd
#'
#' @description \code{detect_closest_out_for_GD} is to find out whether each gd
#' has tips of its sister clade as the closest outgroup, in order to verify
#' the support of gd. The returning object contains a simple summary for the
#' number and percent of gd that has the closest outgroup. This function will
#' also produce pdf files plotting subclade of each gd, with tips colored,
#' for easier manual investigation when required.
#'
#' @param phyto the phyto id for the analysis.
#' @param node a numeric indicating the target node id as in \code{ape}.
#' An alternative arg of \code{phyto}.
#' @param trees a list of objects of class "\code{phylo}".
#' A list of gene trees to be examined. Can be unrooted.
#' @param gdtable a \code{data.frame} of the gd table file provided by Tree2GD.
#' @param sptree the species tree used to do tree reconciliation.
#' @param split a character. The symbol used to separate species name and the
#' sequence number in the gene family trees.
#' @param tree_id_tab a table for the tree id during Tree2GD and the name of the
#' corresponding tree file. The first column is the id, and the second column
#' is the tree names.
#' @param up_one_node logical. Whether to choose tips of one node deeper as the
#' closest outgroup, as a second step of investigation. Default \code{FALSE}.
#' @param pdfwid the width of pdf file for subtrees. Default \code{10}.
#' @param pdflen the length of pdf file for subtrees. Default \code{30}.
#' @param mc.cores the number of cores used for mclapply. Defaults to \code{4}.
#' @export
#' @importFrom parallel mclapply
#' @details For each gd, detect whether there are closest outgroups
#' exactly next to the duplicated node, that can actually define
#' the gd at the indicated node. Otherwise, gd will be possible
#' to be at other (deeper) nodes as well.
#' @examples
#' \dontrun{
#' res<-
#' detect_closest_out_of_GD(
#' phyto,trees = trees,gdtable = gdtable,
#' sptree = sptree,split=".",tree_id_tab = tree_id_tab,
#' up_one_node=FALSE,pdfwid = 20,pdflen = 100)
#'
#' res
#' }
# only for lapply
detect_closest_out_for_GD <- function(
phyto, node, trees, gdtable, sptree, split,
tree_id_tab, up_one_node = FALSE,
pdfwid = 10, pdflen = 30, mc.cores = 2){
# check if tree names matched
t1 <- names(trees)
t2 <- tree_id_tab[ ,2]
m <- match(t1, t2)
if(all(is.na(m))){
stop("Names in tree list are different from the tree_id_tab.")
}
if(missing(phyto) && missing(node)){
stop("Missing both phyto ids and node ids.\n")
}
if(hasArg(node)){
phyto <- NULL
len <- length(node)
}
if(hasArg(phyto)){
node <- NULL
len <- length(phyto)
}
sum<-parallel::mclapply(c(1:len), function(z) {
.detect_out(phyto = phyto[z],
node = node[z],
trees = trees,
gdtable = gdtable,
sptree = sptree,
split = split,
tree_id_tab = tree_id_tab,
up_one_node = up_one_node,
pdfwid = pdfwid,
pdflen = pdflen)
}, mc.cores = mc.cores)
return(sum)
}
# main function but internal for lapply
#' @title Internal. Detect the existence of the closest outgroups for each gd
#'
#' @description Main internal function for \code{detect_closest_out_for_GD}
#' that is to find out whether each gd
#' has tips of its sister clade as the closest outgroup, in order to verify
#' the support of gd. The returning object contains a simple summary for the
#' number and percent of gd that has the closest outgroup. This function will
#' also produce pdf files plotting subclade of each gd, with tips colored,
#' for easier manual investigation when required.
#'
#' @param phyto the phyto id for the analysis.
#' @param node a numeric indicating the target node id as in \code{ape}.
#' An alternative arg of \code{phyto}.
#' @param trees a list of objects of class "\code{phylo}".
#' A list of gene trees to be examined.
#' @param gdtable a \code{data.frame} of the gd table file provided by Tree2GD.
#' @param sptree the species tree used to do tree reconciliation.
#' @param split a character. The symbol used to separate species name and the
#' sequence number in the gene family trees.
#' @param tree_id_tab a table for the tree id during Tree2GD and the name of the
#' corresponding tree file. The first column is the id, and the second column
#' is the tree names.
#' @param up_one_node logical. Whether to choose tips of one node deeper as the
#' closest outgroup, as a second step of investigation. Default \code{FALSE}.
#' @param pdfwid the width of pdf file for subtrees. Default \code{10}.
#' @param pdflen the length of pdf file for subtrees. Default \code{30}.
#' @export
#' @details For each gd, detect whether there are closest outgroups
#' exactly next to the duplicated node, that can actually define
#' the gd at the indicated node. Otherwise, gd will be possible
#' to be at other (depper) nodes as well.
#' @importFrom ape plot.phylo
#' @importFrom ape nodelabels
#' @importFrom ape extract.clade
#' @importFrom ape ladderize
#' @importFrom ape getMRCA
#' @importFrom phangorn Ancestors
#' @importFrom phangorn Descendants
#' @importFrom phangorn getRoot
#' @importFrom grDevices pdf
#' @importFrom grDevices dev.off
#' @importFrom graphics title
#' @keywords internal function
#' @seealso \code{\link{detect_closest_out_for_GD}}
.detect_out<-function(
phyto, node, trees, gdtable, sptree,
split, tree_id_tab, up_one_node = up_one_node,
pdfwid = pdfwid, pdflen = pdflen){
# prepare node id table
phyto_tab <- make.idTable.phyto(sptree)
if(!is.null(phyto) && !is.null(node)){
cat("Both \"phyto\" and \"node\" are given. Use \"phyto\" for the analysis.\n")
}
if(!is.null(node)){
wgdt <- phyto_tab[which(as.numeric(phyto_tab[ ,2]) == node), 1]
wnode <- node
}
if(!is.null(phyto)){
wgdt <- phyto
wnode <- phyto_tab[which(phyto_tab[,1] == phyto), 2]
wnode <- as.numeric(wnode)
}
w <- which(gdtable[ ,3] == wgdt)
length(w)
p109 <- gdtable[w, ]
# grep trees
ttrees <- p109[ ,1]
ttrees <- unique(ttrees)
target.order <- ttrees
m <- match(target.order, tree_id_tab[ ,1])
target <- tree_id_tab[m,2]
treenames <- names(trees)
m <- match(target, treenames)
ntreenames <- treenames[m]
target.trees <- lapply(m, function(xx)
return(trees[[xx]]))
names(target.trees) <- ntreenames
# list of gd pairs
# should find mrca of gd pairs but not all pairs in the tree
pp <- p109[ ,2] # gd
tt <- p109[ ,1] # tree
dup <- duplicated(pp)
pp <- pp[!dup]
tt <- tt[!dup]
plist<-vector("list",length(pp)) # gd
names(plist)<-pp
for(i in 1:length(pp)){
w<-which(p109[,2] == pp[i])
t<-lapply(w,function(xx) c(p109[xx,5],p109[xx,6]))
t<-unlist(t)
plist[[i]]<-t
}
# get basal tips
bnode<-as.numeric(wnode)
anc<-phangorn::Ancestors(sptree,node = bnode,type = "parent")
des<-phangorn::Descendants(sptree,node = anc,type = "children")
des<-des[-which(des == bnode)]
if(des <= length(sptree$tip.label)){
btips<-sptree$tip.label[des]
} else {
st<-ape::extract.clade(sptree,node = des)
btips<-st$tip.label
}
if(up_one_node){
bnode<-anc
anc<-phangorn::Ancestors(sptree,node = bnode,type = "parent")
des<-phangorn::Descendants(sptree,node = anc,type = "children")
des<-des[-which(des == bnode)]
if(des <= length(sptree$tip.label)){
btips<-sptree$tip.label[des]
} else {
st<-ape::extract.clade(sptree,node = des)
btips<-st$tip.label
}
}
cat("basal_tips:\n")
cat(btips,sep=", ")
cat("\n")
# to find gd pair in the target trees
# ntreenames
# target.order
ress<-c()
ntrees<-list()
grDevices::pdf(paste("color_tip_trees_",wgdt,".pdf",sep=""),pdfwid,pdflen)
for(i in 1:length(target.order)){
options(warn = 1)
t<-target.order[i]
tr<-NULL
tr<-target.trees[[i]]
if(is.null(tr)){
warning("A tree in gdtable is not present in trees object: ",t,"\n")
} else {
# required gd
w<-which(tt == t)
pairs<-lapply(w,function(xx)
return(plist[[xx]]))
names(pairs)<-names(plist)[w]
for(y in 1:length(pairs)){
pair<-unique(pairs[[y]])
pn<-names(pairs)[y]
# root by the most distant node
# options(warn=-1)
tr<-root_dist(tr,tar = pair)
tr<-rewrite.tree(ape::ladderize(tr,right=FALSE))
options(warn=1)
mrca<-ape::getMRCA(tr,tip = pair);mrca
if(mrca != phangorn::getRoot(tr)){
anc<-phangorn::Ancestors(tr,node = mrca,type = "parent");anc
des<-phangorn::Descendants(tr,node = anc,type = "children");des
sis<-des[-which(des == mrca)];sis
if(sis <= length(tr$tip.label)){ # tips
stips<-tr$tip.label[sis]
} else { # nodes
str<-ape::extract.clade(tr,node = sis)
stips<-str$tip.label
}
# see if contain basal tips
stips<-lapply(stips,function(xx)
unlist(strsplit(xx,split = split,fixed = TRUE))[1])
stips<-unlist(stips)
check<-stips %in% btips
if(any(check)){ # having basal tips
res<-c(ntreenames[i],"TRUE")
} else {
res<-c(ntreenames[i],"FALSE")
}
ress<-rbind(ress,res)
# plot trees
# to extract subtree include up-two-nodes
if(anc != phangorn::getRoot(tr))
anc<-phangorn::Ancestors(tr,node = anc,type = "parent")
atree<-ape::extract.clade(phy = tr,node = anc)
# color pairs in red
tipcol<-rep("black",length(atree$tip.label))
w<-which(atree$tip.label %in% pair);w
if(length(w) > 0){tipcol[w]<-"red"}
tips<-atree$tip.label
tips<-lapply(tips,function(xx)
unlist(strsplit(xx,split = split,fixed = TRUE))[1])
tips<-unlist(tips)
# color basal tips in green
w<-which(tips %in% btips);w
if(length(w) > 0){tipcol[w]<-"green"}
# plot tree
ape::plot.phylo(atree,tip.color = tipcol,use.edge.length = FALSE,
node.depth = 2)
ape::nodelabels(text = atree$node.label,bg = "white")
graphics::title(paste("tree = ",ntreenames[i],", gd = ",pn,sep=""))
atree$tip.col<-tipcol
alist<-list(atree)
names(alist)<-paste(ntreenames[i],", gd = ",pn,sep="")
ntrees<-append(ntrees,alist)
} else {
war<-paste(ntreenames[i],", gd ",pn," has mrca == root.",sep="")
warning(war)
res<-c(war," ")
ress<-rbind(ress,res)
trr<-list(tr)
names(trr)<-paste(ntreenames[i],", gd = ",pn,sep="")
ntrees<-append(ntrees,trr)
}
} # for gd
} # if tree is not NULL
} # for trees
grDevices::dev.off()
#
saveRDS(ntrees,paste("ntrees_",wgdt,".rds",sep=""))
write.table(ress,paste("Include_or_not_",wgdt,".txt",sep=""),
quote = FALSE,col.names = TRUE,row.names = FALSE)
# ress contains, for each gd, TRUE or FALSE to have the outgroup
# the first column is the tree id, the second column is TREU/FALSE
# thus the first column may be duplicated
tar<-ress[,2]
w<-which(tar == "TRUE")
lw<-length(w) # length of having closest outgroups
lt<-length(tar) # length of all gd
sum<-c(wgdt,lw,lt,round(lw/lt,3))
names(sum)<-c("wgd_node","having_out","total","percent")
cat(sum,sep=", ")
cat("\n")
r<-ress[w,1]
# write a table
page<-w
trn<-r
dd<-data.frame(page=page,tree=trn)
write.table(dd,paste("Trees_have_basal_tips_",wgdt,".txt",sep=""),
sep="\t",quote = FALSE,col.names = TRUE,
row.names = FALSE)
return(sum)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.