Nothing
#' @title Testing search.conv overfit
#' @description Testing the robustness of \code{\link{search.conv}}
#' (\cite{Castiglione et al. 2019b}) results to sampling effects and
#' phylogenetic uncertainty.
#' @usage overfitSC(RR,y.list,phylo.list,s=0.25,
#' nodes=NULL,state=NULL,declust=FALSE,
#' aces=NULL,x1=NULL,aces.x1=NULL,cov=NULL,rootV=NULL, clus=0.5)
#' @param RR an object produced by \code{\link{RRphylo}}.
#' @param y.list a list of multivariate phenotype related to the phylogenetic
#' trees provided as \code{phylo.list} (see Details).
#' @param phylo.list a list of phylogenetic trees. The phylogenies in
#' \code{phylo.list} can either be generated by \code{\link{resampleTree}} or
#' provided by the user. In this latter case, the function is meant to test
#' the robustness of results on alternative topologies, thus the phylogenies
#' must have the same species arranged differently.
#' @param s the percentage of tips to be cut off. It is set at 25\% by default.
#' If \code{phylo.list} is provided, this argument is ignored.
#' @param nodes the argument \code{nodes} as passed to \code{\link{search.conv}}.
#' Please notice, the arguments \code{nodes} and \code{state} can be indicated
#' at the same time.
#' @param state the argument \code{state} as passed to \code{\link{search.conv}}.
#' Please notice, the arguments \code{nodes} and \code{state} can be indicated
#' at the same time.
#' @param declust the argument \code{declust} as passed to \code{\link{search.conv}}.
#' @param aces if used to produce the \code{RR} object, the vector of those
#' ancestral character values at nodes known in advance must be specified.
#' Names correspond to the nodes in the tree.
#' @param x1 the additional predictor to be specified if the RR object has been
#' created using an additional predictor (i.e. multiple version of
#' \code{\link{RRphylo}}). \code{'x1'} vector must be as long as the number of nodes
#' plus the number of tips of the tree, which can be obtained by running
#' \code{\link{RRphylo}} on the predictor as well, and taking the vector of ancestral
#' states and tip values to form the \code{x1}.
#' @param aces.x1 a named vector of ancestral character values at nodes for
#' \code{x1}. It must be indicated if the RR object has been created using
#' both \code{aces} and \code{x1}. Names correspond to the nodes in the tree.
#' @param cov if used to produce the \code{RR} object, the covariate must be
#' specified. As in \code{\link{RRphylo}}, the covariate vector must be as long as
#' the number of nodes plus the number of tips of the tree, which can be
#' obtained by running \code{\link{RRphylo}} on the covariate as well, and taking the
#' vector of ancestral states and tip values to form the covariate.
#' @param rootV if used to produce the \code{RR} object, the phenotypic value at
#' the tree root must be specified.
#' @param clus the proportion of clusters to be used in parallel computing. To
#' run the single-threaded version of \code{overfitSC} set \code{clus} = 0.
#' @return The function returns a 'RRphyloList' object containing:
#' @return \strong{$RR.list} a 'RRphyloList' including the results of each
#' \code{\link{RRphylo}} performed within \code{overfitSC}
#' @return \strong{$SCnode.list} a 'RRphyloList' including the results of each
#' \code{search.conv - clade condition} performed within \code{overfitSC}
#' @return \strong{$SCstate.list} a 'RRphyloList' including the results of each
#' \code{search.conv - state condition} performed within \code{overfitSC}
#' @return \strong{$conv.results} a list including results for
#' \code{\link{search.conv}} performed under \code{clade} and \code{state}
#' conditions. If a node pair is specified within \code{conv.args}, the
#' \code{$clade} object contains the percentage of simulations producing
#' significant p-values for convergence between the clades, and the proportion
#' of tested trees (i.e. where the clades identity was preserved; always 1 if
#' no \code{phylo.list} is supplied). If a state vector is supplied within
#' \code{conv.args}, the object \code{$state} contains the percentage of
#' simulations producing significant p-values for convergence within (single
#' state) or between states (multiple states).
#' @return The output always has an attribute "Call" which returns an unevaluated call to the function.
#' @author Silvia Castiglione, Giorgia Girardi, Carmela Serio
#' @details Methods using a large number of parameters risk being overfit. This
#' usually translates in poor fitting with data and trees other than the those
#' originally used. With \code{\link{RRphylo}} methods this risk is usually very low.
#' However, the user can assess how robust the results of \code{\link{search.conv}}
#' are by running \code{\link{resampleTree}} and \code{overfitSC}. The former is used
#' to subsample the tree according to a \code{s} parameter (that is the
#' proportion of tips to be removed from the tree) and to alter tree topology
#' by means of \code{\link{swapONE}}. Once a list of new phylogenetic trees
#' (\code{phylo.list}) is generated, in case the shape data to feed to
#' \code{\link{search.conv}} are reduced (e.g. via SVD), it is necessary to recompute
#' data reduction, thus obtaining a list of multivariate phenotypes related to
#' the phylogenetic trees (\code{y.list}). Finally, \code{overfitSC} performs
#' \code{\link{RRphylo}} and \code{\link{search.conv}} on each new set of tree and data.
#' Thereby, both the potential for overfit and phylogenetic uncertainty are
#' accounted for straight away.
#'
#'
#' Otherwise, a list of alternative phylogenies can be supplied to
#' \code{overfitSC}. In this case subsampling and swapping arguments are
#' ignored, and robustness testing is performed on the alternative topologies
#' as they are. If a clade has to be tested in \code{\link{search.conv}}, the
#' function scans each alternative topology searching for the corresponding
#' clade. If the species within such clade on the alternative topology differ
#' more than 10\% from the species within the clade in the original tree, the
#' identity of the clade is considered disrupted and the test is not
#' performed.
#' @export
#' @seealso \href{../doc/search.conv.html}{\code{search.conv} vignette};
#' \href{../doc/overfit.html}{\code{overfit} vignette};
#' \href{../doc/Alternative-trees.html}{\code{Alternative-trees} vignette}
#' @references Castiglione, S., Serio, C., Tamagnini, D., Melchionna, M.,
#' Mondanaro, A., Di Febbraro, M., Profico, A., Piras, P.,Barattolo, F., &
#' Raia, P. (2019b). A new, fast method to search for morphological
#' convergence with shape data. \emph{PLoS ONE}, 14, e0226949.
#' https://doi.org/10.1371/journal.pone.0226949
#' @examples
#' \dontrun{
#' require(phytools)
#' require(Morpho)
#' require(ape)
#'
#' cc<- 2/parallel::detectCores()
#'
#' DataFelids$treefel->treefel
#' DataFelids$statefel->statefel
#' DataFelids$landfel->feldata
#'
#' # perform data reduction via Procrustes superimposition (in this case) and RRphylo
#' procSym(feldata)->pcafel
#' pcafel$PCscores->PCscoresfel
#'
#' RRphylo(treefel,PCscoresfel,clus=cc)->RRfelids
#'
#' # apply search.conv under nodes and state condition
#' search.conv(RR=RRfelids, y=PCscoresfel, min.dim=5, min.dist="time38", clus=cc)->sc.clade.time
#'
#' search.conv(tree=treefel, y=PCscoresfel, state=statefel, declust=TRUE, clus=cc)->sc.state
#'
#' # select converging clades returned in sc.clade.time
#' felnods<-rbind(c(85,155),c(85,145))
#'
#' ## overfitSC routine
#'
#' # generate a list of subsampled and swapped phylogenies to test for search.conv
#' # robustness. Use as reference tree the phylogeny returned by RRphylo.
#' # Set the nodes and the categories under testing as arguments of
#' # resampleTree so that it maintains no less than 5 species at least in each
#' # clade/state.
#' treefel.list<-resampleTree(RRfelids$tree,s=0.15,nodes=unique(c(felnods)),categories=statefel,
#' nsim=15,swap.si=0.1,swap.si2=0.1)
#'
#' # match the original data with each subsampled-swapped phylogeny in treefel.list
#' # and repeat data reduction
#' y.list<-lapply(treefel.list,function(k){
#' treedataMatch(k,feldata)[[1]]->ynew
#' procSym(ynew)$PCscores
#' })
#'
#' # test for robustness of search.conv results by overfitSC
#' oSC<-overfitSC(RR=RRfelids,phylo.list=treefel.list,y.list=y.list,
#' nodes = felnods,state=statefel,clus=cc)
#'
#' }
overfitSC<-function(RR,y.list,phylo.list,s=0.25,
nodes=NULL,state=NULL,declust=FALSE,
aces=NULL,x1=NULL,aces.x1=NULL,cov=NULL,rootV=NULL,
clus=0.5){
if (!requireNamespace("ddpcr", quietly = TRUE)) {
stop("Package \"ddpcr\" needed for this function to work. Please install it.",
call. = FALSE)
}
if(is.null(nodes)&is.null(state)) stop("One of nodes or state must be provided")
funcall<-match.call()
nsim<-length(phylo.list)
RR$tree->tree
RR$aces->y.ace
tree$node.label<-rownames(y.ace)
pb = txtProgressBar(min = 0, max = nsim, initial = 0)
node.match<-RR.list<-SCcut<-SCcutS<-list()
for(k in 1:nsim){
setTxtProgressBar(pb,k)
phylo.list[[k]]->treecut
y.list[[k]]->ycut
y.ace[which(rownames(y.ace)%in%treecut$node.label),,drop=FALSE]->y.acecut
if(!is.null(nodes)){
if(is.null(ncol(nodes))) matrix(nodes,ncol=2)->nodes
if(is.data.frame(nodes)) as.matrix(nodes)->nodes
nodec<-array()
for(i in 1:length(unique(c(nodes)))){
getMRCA(treecut,tips(tree,unique(c(nodes))[i])[which(tips(tree,unique(c(nodes))[i])%in%treecut$tip.label)])->scN
if(Ntip(phylo.list[[1]])==Ntip(tree)){
length(tips(treecut,scN))/length(tips(tree,unique(c(nodes))[i]))->sh.tips
if(sh.tips<=1.1&sh.tips>=0.9) scN->nodec[i] else NA->nodec[i]
} else scN->nodec[i]
# if(any(is.na(nodec))) nodec<-NULL
}
names(nodec)<-unique(c(nodes))
node.cut<-matrix(nodec[match(nodes,names(nodec))],ncol=2)
node.match[[k]]<-apply(node.cut,1,function(k) c(paste(k,collapse="-"),paste(rev(k),collapse = "-")),simplify = FALSE)
node.cut<-node.cut[which(!apply(node.cut,1,function(x) any(is.na(x)))),,drop=FALSE]
# getMRCA(treecut,tips(tree,nodes[i])[which(tips(tree,nodes[i])%in%treecut$tip.label)])->node.cut[i]
# if(nrow(node.cut)>0) data.frame(unique(c(nodes)),nodec)->node.match[[k]] else NULL->node.match[[k]]
}
if(!is.null(state)) {
state<-treedataMatch(tree,state)[[1]][,1]
state.cut<-treedataMatch(treecut,state)[[1]][,1]
}
if(!is.null(cov)){
treedataMatch(treecut,cov)$y->covcut
c(RRphylo(treecut,covcut,clus=clus)$aces[,1],covcut)->covcut
# cov[match(c(rownames(y.acecut),rownames(ycut)),names(cov))]->covcut
# names(covcut)[1:Nnode(treecut)]<-seq((Ntip(treecut)+1),(Ntip(treecut)+Nnode(treecut)))
}else covcut<-NULL
if(!is.null(x1)) {
as.matrix(x1)->x1
treedataMatch(treecut,x1)$y->x1cut
rbind(RRphylo(treecut,x1cut,clus=clus)$aces,x1cut)->x1cut
# x1[match(c(rownames(y.acecut),rownames(ycut)),rownames(x1)),,drop=FALSE]->x1cut
# rownames(x1cut)[1:Nnode(treecut)]<-seq((Ntip(treecut)+1),(Ntip(treecut)+Nnode(treecut)))
}else x1cut<-NULL
if(!is.null(aces)){
if(is.vector(aces)) as.matrix(aces)->aces
aces->acescut
dropace<-c()
for(i in 1:nrow(aces)) {
if(length(which(tips(tree,rownames(aces)[i])%in%treecut$tip.label))>1){
getMRCA(treecut,tips(tree,rownames(aces)[i])[which(tips(tree,rownames(aces)[i])%in%treecut$tip.label)])->newN
if(Ntip(phylo.list[[1]])==Ntip(tree)){
length(tips(treecut,newN))/length(tips(tree,rownames(aces)[i]))->sh.tips
if(sh.tips<=1.1&sh.tips>=0.9) newN->rownames(acescut)[i] else c(dropace,i)->dropace
}else newN->rownames(acescut)[i]
# getMRCA(treecut,tips(tree,rownames(aces)[i])[which(tips(tree,rownames(aces)[i])%in%treecut$tip.label)])->rownames(acescut)[i]
}else c(dropace,i)->dropace
}
if(length(dropace>0)) acescut[-dropace,,drop=FALSE]->acescut
if(is.null(nrow(acescut))) acescut<-NULL
}else acescut<-NULL
if(!is.null(aces.x1)){
if(is.vector(aces.x1)) as.matrix(aces.x1)->aces.x1
aces.x1->aces.x1cut
dropace<-c()
for(i in 1:nrow(aces.x1)) {
if(length(which(tips(tree,rownames(aces.x1)[i])%in%treecut$tip.label))>1){
getMRCA(treecut,tips(tree,rownames(aces.x1)[i])[which(tips(tree,rownames(aces.x1)[i])%in%treecut$tip.label)])->newN1
if(Ntip(phylo.list[[1]])==Ntip(tree)){
length(tips(treecut,newN1))/length(tips(tree,rownames(aces.x1)[i]))->sh.tips
if(sh.tips<=1.1&sh.tips>=0.9) newN1->rownames(aces.x1cut)[i] else c(dropace,i)->dropace
}else newN1->rownames(aces.x1cut)[i]
# getMRCA(treecut,tips(tree,rownames(aces.x1)[i])[which(tips(tree,rownames(aces.x1)[i])%in%treecut$tip.label)])->rownames(aces.x1cut)[i]
}else c(dropace,i)->dropace
}
if(length(dropace>0)) aces.x1cut[-dropace,,drop=FALSE]->aces.x1cut
if(is.null(nrow(aces.x1cut))) aces.x1cut<-NULL
}else aces.x1cut<-NULL
if(!is.null(rootV)) rootV->rootVcut else rootVcut<-NULL
RRphylo(treecut,ycut,aces=acescut,x1=x1cut,aces.x1=aces.x1cut,cov=covcut,rootV = rootVcut,clus=clus)->RRcut->RR.list[[k]]
if(!is.null(nodes)&&nrow(node.cut)>0) ddpcr::quiet(search.conv(RR=RRcut,y=ycut,nodes=node.cut,aceV=acescut,clus=clus)->sccut->SCcut[[k]],all=TRUE)
# if(!is.null(nodes)&&any(!is.na(node.cut))) ddpcr::quiet(search.conv(RR=RRcut,y=ycut,nodes=na.omit(node.cut),aceV=acescut,clus=clus)->sccut->SCcut[[k]],all=TRUE)
if(!is.null(state)) ddpcr::quiet(search.conv(tree=treecut,y=ycut,state=state.cut,aceV=acescut,declust=declust,clus=clus)->sccut->SCcutS[[k]],all=TRUE)
}
close(pb)
if(!is.null(nodes)){
lapply(SCcut,"[[",1)->scres
scres[which(!sapply(SCcut,is.null))]->scres
scnodes<-mapply(a=node.match,b=scres,function(a,b){
lapply(a,function(aa) b[which(paste(rownames(b),b[,1],sep="-")%in%aa),,drop=FALSE])
},SIMPLIFY =FALSE)
t(sapply(1:length(scnodes[[1]]),function(rr){
do.call(rbind,lapply(scnodes,"[[",rr))->resn
c(sum(resn[,8]<=0.05)/nrow(resn),sum(resn[,9]<=0.05)/nrow(resn),nrow(resn)/nsim)
}))->p.convC
colnames(p.convC)<-c("p.ang.bydist","p.ang.conv","tested.trees")
rownames(p.convC)<-apply(nodes,1,paste,collapse="-")
}else SCcut<-p.convC<-NULL
if(!is.null(state)){
lapply(SCcutS,"[[",1)->scresS
p.convS<-matrix(ncol=2,nrow=nrow(scresS[[1]]))
if("nostate"%in%state&length(unique(state)[-which(unique(state)=="nostate")])){
c(length(which(sapply(scresS,function(x) x[1,3])<=0.05))/nsim,
length(which(sapply(scresS,function(x) x[1,4])<=0.05))/nsim)->p.convS[1,]
rownames(p.convS)<-rownames(scresS[[1]])
colnames(p.convS)<-colnames(scresS[[1]])[3:4]
}else{
for(i in 1:nrow(scresS[[1]])){
c(length(which(sapply(scresS,function(x) x[i,5])<=0.05))/nsim,
length(which(sapply(scresS,function(x) x[i,6])<=0.05))/nsim)->p.convS[i,]
}
rownames(p.convS)<-apply(scresS[[1]][,1:2],1,function(x) paste(x[1],x[2],sep="-"))
colnames(p.convS)<-colnames(scresS[[1]])[5:6]
}
}else SCcutS<-p.convS<-NULL
list(p.convC,p.convS)->conv.res
names(conv.res)<-c("clade","state")
class(RR.list)<-"RRphyloList"
if(!is.null(SCcut)) class(SCcut)<-"RRphyloList"
if(!is.null(SCcutS)) class(SCcutS)<-"RRphyloList"
res<-structure(list(RR.list=RR.list,
SCnode.list=SCcut,
SCstate.list=SCcutS,
conv.results=conv.res),
class = "RRphyloList")
attr(res,"Call")<-funcall
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.