## ------------------------------------------------------------------------
## read *PQTREE* from ANGES output (do not use *PQRTREE* and do not use
## *DOUBLED*) and convert it into a table-like format with columns $marker,
## $orientation, $car, and at least two additional columns with node $type*
## and $elem*
## ------------------------------------------------------------------------
#' Convert linearly encoded \emph{PQ-trees} into a two-dimensional
#' \emph{PQ-structure}
#'
#' Convert linearly encoded \emph{PQ-trees} of an ancestral genome
#' reconstruction, for example as output by the software ANGES (Jones \emph{et
#' al.} 2012), into a two-dimensional \emph{PQ-structure} that can be used as
#' \code{compgenome} input for the functions \code{\link{computeRearrs}},
#' \code{\link{summarizeBlocks}}, and \code{\link{genomeRearrPlot}}
#'
#' @param rawtree Data frame with a single column that encodes the structure of
#' \emph{PQ-trees} as strings. See Details.
#'
#' @details
#'
#' \code{rawtree} is a data frame with a single column encoding an ancestral
#' genome reconstruction as strings, for example as generated by the software
#' ANGES (Chauve & Tannier 2008; Jones \emph{et al.} 2012). The first row
#' gives the name of the ancestor (preceded by \code{>}), followed by two rows
#' for each ancestral genome segment (CAR). The first row of each set gives
#' the CAR ID (preceded by \code{#CAR}), and the second row provides the
#' \emph{PQ-tree} structure of that CAR. The children of each node in the
#' \emph{PQ-tree} are enclosed by \code{_P} and \code{P_} markups for
#' \emph{P-nodes}, or by \code{_Q} and \code{Q_} markups for \emph{Q-nodes}.
#' Markers (i.e., ortholog IDs) that belong to a particular node are located
#' between its corresponding markups. Markers with reversed orientation are
#' preceded by a \code{-} sign. The opening (\code{_P} or \code{_Q}) and
#' closing (\code{P_} or \code{Q_}) markups can be nested to allow the
#' representation of the hierarchical structure of the \emph{PQ-tree}. For
#' details on \emph{PQ-trees} see Booth & Lueker 1976, Chauve & Tannier 2008,
#' or the package vignette.
#'
#' Example of a \code{rawtree} object, corresponding to
#' \code{\link{TOY24_compgenome}}:\cr
#'
#' \code{>TOY24}\cr
#' \code{#CAR1}\cr
#' \code{_Q 1 2 3 4 5 -6 7 8 Q_}\cr
#' \code{#CAR2}\cr
#' \code{_Q 9 _Q 10 Q_ _Q 11 12 13 Q_ Q_}\cr
#' \code{#CAR3}\cr
#' \code{_Q 14 Q_}\cr
#' \code{#CAR4}\cr
#' \code{_Q _P 15 16 17 18 P_ _Q _Q -19 20 21 Q_ _Q -22 -23 24 Q_ Q_ Q_}
#'
#' \strong{Important:} The function was designed to work with the ANGES
#' \code{*PQTREE*} output file. It was not desiged to work with the
#' ancillary \code{*PQRTREE*} or the \code{*DOUBLED*} output files.
#'
#' @section References:
#'
#' Booth, K.S. & Lueker, G.S. (1976). Testing for the consecutive ones
#' property, interval graphs, and graph planarity using \emph{PQ}-Tree
#' algorithms. \emph{Journal of Computer and System Sciences}, \strong{13},
#' 335--379. doi:
#' \href{https://doi.org/10.1016/S0022-0000(76)80045-1}{10.1016/S0022-0000(76)80045-1}.
#'
#' Chauve, C. & Tannier, E. (2008). A methodological framework for the
#' reconstruction of contiguous regions of ancestral genomes and its
#' application to mammalian genomes. \emph{PLOS Computational Biology},
#' \strong{4}, e1000234. doi:
#' \href{https://doi.org/10.1371/journal.pcbi.1000234}{10.1371/journal.pcbi.1000234}.
#'
#' Jones, B. R. \emph{et al.} (2012). ANGES: reconstructing ANcestral GEnomeS
#' maps. \emph{Bioinformatics}, \strong{28}, 2388--2390. doi:
#' \href{https://doi.org/10.1093/bioinformatics/bts457}{10.1093/bioinformatics/bts457}
#'
#' @return A data frame encoding \emph{PQ-trees} of an ancestral genome
#' reconstruction as a two-dimensional \emph{PQ-structure}.
#'
#' See the description of the \code{"compgenome"} class in the Details section
#' of the \code{\link{checkInfile}} function for more information on the
#' output format.
#'
#' @seealso \code{\link{checkInfile}}, \code{\link{computeRearrs}},
#' \code{\link{summarizeBlocks}}, \code{\link{genomeRearrPlot}}.
#'
#' @examples
#' \dontrun{
#'
#' ## Read a text file with an ancestral genome reconstruction
#' ## from the software ANGES into R:
#' MSSYE_PQTREE_HEUR <- read.table("PATH/TO/MSSYE_PQTREE_HEUR",
#' sep = ",", comment.char = "",
#' as.is = TRUE)
#'
#' ## Convert linearly encoded PQ-trees into PQ-structure:
#' MSSYE_compgenome <- convertPQtree(MSSYE_PQTREE_HEUR)
#' }
#'
#' @export
#' @importFrom utils tail
convertPQtree<-function(rawtree){
tree<-character()
car<-NA
elem<-NA
ori<-NA
nodestack<-character()
countstack<-integer()
maxnodes<-0
converted<-character()
for(i in 1:nrow(rawtree)){
if(grepl("^>",rawtree[i,])){
cat(paste0("\nconverting data for ",sub(">","",rawtree[i,])))
}else if(grepl("^#CAR",rawtree[i,])){
car<-as.integer(sub("#CAR","",rawtree[i,]))
}else{
tree<-unlist(strsplit(rawtree[i,]," +"))
if(length(tree) == 0){
stop(paste0("CAR",car," does not match the required format"))
}
for(j in 1:length(tree)){
elem<-tree[j]
if(length(countstack)>0){
## already node(s)
## -> increment node counter
countstack[length(countstack)]<-countstack[length(countstack)]+1
}
if(is.element(elem,c("_P","_Q"))){
## node opens
elem<-sub("_","",elem)
nodestack<-c(nodestack,elem)
countstack<-c(countstack,0)
}else if(is.element(elem,c("P_","Q_"))){
## node closes
elem<-sub("_","",elem)
if(length(nodestack) == 0){
stop(paste0("something in the hierarchy of CAR",car," is wrong"))
}else if(tail(nodestack,n=1L) != elem){
stop(paste0("something in the hierarchy of CAR",car," is wrong"))
}else{
nodestack<-nodestack[-length(nodestack)]
countstack<-countstack[-length(countstack)]
}
}else{
## marker
if(grepl("^-",elem)){
ori<-"-"
elem<-sub("^-","",elem)
}else{
ori<-"+"
}
tmp<-c(elem,ori,car)
if(length(nodestack) == 0){
stop(paste0("something in the hierarchy of CAR",car," is wrong"))
}else if(length(nodestack) != length(countstack)){
stop(paste0("something in the hierarchy of CAR",car," is wrong"))
}
for(k in 1:length(nodestack)){
tmp<-c(tmp,nodestack[k],countstack[k])
}
maxnodes<-max(maxnodes,length(nodestack))
converted<-c(converted,paste(tmp,collapse=" "))
}
}
}
}
if(is.na(car)){
cat("\n... no CARs found ...\n\n")
return(NA)
}else if(car == 1){
cat(paste("\n... processed",car,"CAR ...\n\n"))
}else{
cat(paste("\n... processed",car,"CARs ...\n\n"))
}
## convert into data frame
convtree<-matrix(NA,nrow=length(converted),ncol=maxnodes*2+3)
for(i in 1:length(converted)){
tmp<-unlist(strsplit(converted[i]," "))
convtree[i,1:length(tmp)]<-tmp
}
convtree<-as.data.frame(convtree,stringsAsFactors=FALSE)
for(i in seq(1,ncol(convtree),2)){
convtree[,i]<-as.integer(convtree[,i])
}
colnames(convtree)<-c("marker","orientation","car",
paste0(rep(c("type","elem"),
times=(ncol(convtree)-3)/2),
rep(1:((ncol(convtree)-3)/2),each=2)))
return(convtree)
}
## ------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.