R/ancestral_generics.R

Defines functions as.data.frame.ancestral highest_state as.phyDat.ancestral print.ancestral extract_prob extract_states as.ancestral write.ancestral

Documented in as.ancestral as.data.frame.ancestral as.phyDat.ancestral print.ancestral write.ancestral

#' Export and convenience functions for  ancestral reconstructions
#'
#' \code{write.ancestral} allows to export ancestral reconstructions. It writes
#' out the tree, a tab delimited text file with the probabilities and the
#' alignment. \code{ancestral} generates an object of class ancestral.
#'
#' This allows also to read in reconstruction  made by iqtree to use the
#' plotting capabilities of R.
#' @param x an object of class ancestral.
#' @param file a file name. File endings are added.
#' @param ... Further arguments passed to or from other methods.
#' @returns \code{write.ancestral}  returns the input x invisibly.
#' @seealso \code{\link{anc_pml}}, \code{\link{plotAnc}}
#' @examples
#' data(Laurasiatherian)
#' fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none")
#' anc_ml <- anc_pml(fit)
#' write.ancestral(anc_ml)
#' # Can be also results from iqtree
#' align <- read.phyDat("ancestral_align.fasta", format="fasta")
#' tree <- read.tree("ancestral_tree.nwk")
#' df <- read.table("ancestral.state", header=TRUE)
#' anc_ml_disc <- as.ancestral(tree, df, align)
#' plotAnc(anc_ml_disc, 20)
#' unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state"))
#' @rdname write.ancestral
#' @export
write.ancestral <- function(x, file="ancestral"){
  stopifnot(inherits(x, "ancestral"))
  write.phyDat(x$data, file=paste0(file, "_align.fasta"), format="fasta")
  tab <- as.data.frame(x)
  fname <- paste0(file, ".state")
  tname <- paste0(file, "_tree.nwk")
  cat("# Ancestral state reconstruction generated by phangorn",
      packageDescription("phangorn", fields = "Version"), "\n#\n", file = fname)
  cat("# Columns are tab-separated with following meaning:\n", file = fname,
      append=TRUE)
  cat("#  Node:  Node name in the tree\n", file = fname, append=TRUE)
  cat("#  Site:  Alignment site ID\n", file = fname, append=TRUE)
  cat("#  State: Most likely state assignment\n", file = fname, append=TRUE)
  cat("#  p_X:   Posterior probability for state X (empirical Bayesian method)\n#\n",
      file = fname, append=TRUE)
  cat("# This file and the accompanying tree can be read in R with command:\n",
      file = fname, append=TRUE)
  cat("# library(phangorn)\n", file = fname, append=TRUE)
  cat("# tree <- read.tree(\"",tname,"\")\n", sep="", file = fname, append=TRUE)
  cat("# df <- read.table(\"",tname,"\", header=TRUE) \n", sep="", file = fname,
      append=TRUE)
  cat("# anc <- as.ancestral(tree, df) \n", file = fname, append=TRUE)
  cat("# plotAnc(anc) \n#\n", file = fname, append=TRUE)
  cat("# For more information type ?plotAnc or\n", file = fname, append=TRUE)
  cat("# vignette(\"Ancestral\", package=\"phangorn\") in R.\n#\n",
      file = fname, append=TRUE)
  cat(colnames(tab), "\n", sep="\t", file = fname, append=TRUE)
  write.table(tab, file=fname, quote=FALSE, row.names=FALSE, col.names=FALSE,
              sep="\t", append=TRUE)
  write.tree(x$tree, file=tname)
  invisible(x)
}


#' @param tree an object of class phylo.
#' @param align an object of class phyDat.
#' @param prob an data.frame containing a matrix of posterior probabilities for
#' each state and site.
#' @importFrom utils head
#' @rdname write.ancestral
#' @export
as.ancestral <- function(tree, prob, align=NULL){
  stopifnot(inherits(tree, "phylo"))
  if(!is.null(align))stopifnot(inherits(align, "phyDat"))
  stopifnot(inherits(prob, "data.frame"))
  if(is.null(tree$node.label))stop("tree needs node.label")
  state <- extract_states(prob, attr(align, "type"),
                          levels=attr(align, "levels"))
  prob <- extract_prob(prob, align, tree$node.label)
  erg <- list(tree=tree, data=align[tree$tip.label], prob=prob,
              state=state[tree$node.label])
  class(erg) <- "ancestral"
  erg
}


extract_states <- function(x, type, levels=NULL){
  node_label <- unique(x$"Node")
  y <- matrix(x$"State", nrow=length(node_label), byrow=TRUE,
              dimnames = list(node_label, NULL))
  if(type %in% c("DNA", "AA")) return( phyDat(y, type=type) )
  phyDat(y, type=type, levels=levels)
}

extract_prob <- function(x, align, node_label){
  index <- attr(align, "index")
  idx <- which(!duplicated(index))
  att <- attributes(align)
  res <- vector("list", length(node_label))
  for(i in seq_along(node_label)){
    tmp <- x[x$"Node"==node_label[i], -c(1:3)] |> as.matrix()
    dimnames(tmp) <- NULL
    res[[i]] <- as.matrix(tmp)[idx,]
  }
  att$names <- node_label
  attributes(res) <- att
  res
}

#' @rdname write.ancestral
#' @export
print.ancestral <- function(x, ...){
  stopifnot(inherits(x, "ancestral"))
  print(x$tree)
  cat("\n")
  print(x$data)
  #  cat("\n")
  #  print(head(x$prob))
}

#' @rdname write.ancestral
#' @param x an object of class ancestral
#' @export
as.phyDat.ancestral <- function(x, ...) {
  rbind(x$data, x$state)
}



highest_state <- function(x, ...) {
  type <- attr(x, "type")
  fun2 <- function(x) {
    x <- p2dna(x)
    fitchCoding2ambiguous(x)
  }
  if (type == "DNA" && !has_gap_state(x)) {
    res <- lapply(x, fun2)
  }
  else {
    eps <- 1.0e-5
    contr <- attr(x, "contrast")
    attr <- attributes(x)
    pos <- match(attr$levels, attr$allLevels)
    res <- lapply(x, function(x, pos) pos[max.col(x)], pos)
  }
  attributes(res) <- attributes(x)
  class(res) <- "phyDat"
  return(res)
}


#' @rdname write.ancestral
#' @export
as.data.frame.ancestral <- function(x, ...) {
  stopifnot(inherits(x, "ancestral"))
  y <- rbind(x$data, x$state)
  contrast <- attr(x$data, "contrast")
  Y <- unlist(as.data.frame(y))
  n_node <- Nnode(x$tree)
  n_tip <- Ntip(x$tree)
  l <- length(y)
  nr <- attr(x$data, "nr")
  nc <- attr(x$data, "nc")
  index <- attr(x$data, "index")
  nr <- length(index)
  nam <- names(y)
  X <- matrix(0, l*length(index), nc)
  j <- 0
  for(i in seq_len(n_tip)){
    X[(j+1):(j+nr), ] <- contrast[x$data[[i]][index], ]
    j <- j + nr
  }
  for(i in seq_len(n_node)){
    X[(j+1):(j+nr), ] <- x$prob[[i]][index, ]
    j <- j + nr
  }
  res <- data.frame(Node=rep(nam, each=nr), Site=rep(seq_len(nr), l), Y, X)
  colnames(res) <- c("Node", "Site", "State",
                     paste0("p_", attr(x$data, "levels")))
  rownames(res) <- NULL
  res
}
KlausVigo/phangorn documentation built on Nov. 5, 2024, 11 a.m.