inst/doc/Introduction_to_treeSummarizedExperiment.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

## ----strTSE, echo=FALSE, fig.cap= "The structure of the TreeSummarizedExperiment class."----
knitr::include_graphics("tse.png")

## -----------------------------------------------------------------------------
library(TreeSummarizedExperiment)

# assays data (typically, representing observed data from an experiment)
assay_data <- rbind(rep(0, 4), matrix(1:20, nrow = 5))
colnames(assay_data) <- paste0("sample", 1:4)
rownames(assay_data) <- paste("entity", seq_len(6), sep = "")
assay_data

## -----------------------------------------------------------------------------
# row data (feature annotations)
row_data <- data.frame(Kingdom = "A",
                       Phylum = rep(c("B1", "B2"), c(2, 4)),
                       Class = rep(c("C1", "C2", "C3"), each = 2),
                       OTU = paste0("D", 1:6),
                       row.names = rownames(assay_data),
                       stringsAsFactors = FALSE)

row_data
# column data (sample annotations)
col_data <- data.frame(gg = c(1, 2, 3, 3),
                       group = rep(LETTERS[1:2], each = 2), 
                       row.names = colnames(assay_data),
                       stringsAsFactors = FALSE)
col_data

## -----------------------------------------------------------------------------
library(ape)

# The first toy tree 
set.seed(12)
row_tree <- rtree(5)

# The second toy tree 
set.seed(12)
col_tree <- rtree(4)

# change node labels
col_tree$tip.label <- colnames(assay_data)
col_tree$node.label <- c("All", "GroupA", "GroupB")

## ----plot-rtree, fig.cap="\\label{plot-rtree} The structure of the row tree. The node labels and the node numbers are in orange and blue text, respectively.", out.width="90%"----
library(ggtree)
library(ggplot2)

# Visualize the row tree
ggtree(row_tree, size = 2, branch.length = "none") +
    geom_text2(aes(label = node), color = "darkblue",
               hjust = -0.5, vjust = 0.7, size = 5.5) +
    geom_text2(aes(label = label), color = "darkorange",
               hjust = -0.1, vjust = -0.7, size = 5.5) 

## ----plot-ctree, fig.cap="\\label{plot-ctree} The structure of the column tree. The node labels and the node numbers are in orange and blue text, respectively.", out.width="90%"----
# Visualize the column tree
ggtree(col_tree, size = 2, branch.length = "none") +
    geom_text2(aes(label = node), color = "darkblue",
               hjust = -0.5, vjust = 0.7, size = 5.5) +
    geom_text2(aes(label = label), color = "darkorange",
               hjust = -0.1, vjust = -0.7, size = 5.5)+
    ylim(c(0.8, 4.5)) +
    xlim(c(0, 2.2))

## -----------------------------------------------------------------------------
# all column names could be found in the node labels of the column tree
all(colnames(assay_data) %in% c(col_tree$tip.label, col_tree$node.label))

# provide the node labels in rowNodeLab
tip_lab <- row_tree$tip.label
row_lab <- tip_lab[c(1, 1:5)]
row_lab

both_tse <- TreeSummarizedExperiment(assays = list(Count = assay_data),
                                     rowData = row_data,
                                     colData = col_data,
                                     rowTree = row_tree,
                                     rowNodeLab = row_lab,
                                     colTree = col_tree)

## -----------------------------------------------------------------------------
both_tse

## -----------------------------------------------------------------------------
# to get the first table in the assays
(count <- assays(both_tse)[[1]])

## -----------------------------------------------------------------------------
# to get row data
rowData(both_tse)

## -----------------------------------------------------------------------------
# to get column data
colData(both_tse)

## -----------------------------------------------------------------------------
# to get metadata: it's empty here
metadata(both_tse)

## -----------------------------------------------------------------------------
# access trees
rowTree(both_tse)
colTree(both_tse)

## -----------------------------------------------------------------------------
# access the link data
(r_link <- rowLinks(both_tse))
(c_link <- colLinks(both_tse))

## -----------------------------------------------------------------------------
class(r_link)
showClass("LinkDataFrame")

## -----------------------------------------------------------------------------
sub_tse <- both_tse[1:2, 1]
sub_tse

## -----------------------------------------------------------------------------
# the row data
rowData(sub_tse)

# the row link data
rowLinks(sub_tse)


## -----------------------------------------------------------------------------
# The first four columns are from colLinks data and the others from colData
cbind(colLinks(sub_tse), colData(sub_tse))

## -----------------------------------------------------------------------------
node_tse <- subsetByNode(x = both_tse, rowNode = "t3")

rowLinks(node_tse)

## -----------------------------------------------------------------------------
node_tse <- subsetByNode(x = both_tse, rowNode = "t3", 
                         colNode = c("sample1", "sample2"))
assays(node_tse)[[1]]

## ----plot-taxa2phylo, fig.cap="\\label{plot-taxa2phylo} The structure of the taxonomic tree that is generated from the taxonomic table.", out.width="90%"----
# The toy taxonomic table
(taxa <- rowData(both_tse))

# convert it to a phylo tree
taxa_tree <- toTree(data = taxa)

# Viz the new tree
ggtree(taxa_tree)+
    geom_text2(aes(label = node), color = "darkblue",
               hjust = -0.5, vjust = 0.7, size = 5.5) +
    geom_text2(aes(label = label), color = "darkorange",
               hjust = -0.1, vjust = -0.7, size = 5.5) +
    geom_point2()

## -----------------------------------------------------------------------------
taxa_tse <- changeTree(x = both_tse, rowTree = taxa_tree, 
                       rowNodeLab = taxa[["OTU"]])

taxa_tse
rowLinks(taxa_tse)

## -----------------------------------------------------------------------------
# use node labels to specify colLevel
agg_col <- aggValue(x = taxa_tse, 
                    colLevel = c("GroupA", "GroupB"),
                    FUN = sum)
# or use node numbers to specify colLevel
agg_col <- aggValue(x = taxa_tse, colLevel = c(6, 7), FUN = sum)

## -----------------------------------------------------------------------------
assays(agg_col)[[1]]

## -----------------------------------------------------------------------------
# before aggregation
colData(taxa_tse)
# after aggregation
colData(agg_col)

## -----------------------------------------------------------------------------
# the link data is updated
colLinks(agg_col)

## -----------------------------------------------------------------------------
# the phylum level
taxa <- c(taxa_tree$tip.label, taxa_tree$node.label)
(taxa_one <- taxa[startsWith(taxa, "Phylum:")])

# aggregation
agg_taxa <- aggValue(x = taxa_tse, rowLevel = taxa_one, FUN = sum)
assays(agg_taxa)[[1]]

## -----------------------------------------------------------------------------
# A mixed level
taxa_mix <- c("Class:C3", "Phylum:B1")
agg_any <- aggValue(x = taxa_tse, rowLevel = taxa_mix, FUN = sum)
rowData(agg_any)

## -----------------------------------------------------------------------------
agg_both <- aggValue(x = both_tse, colLevel = c(6, 7), 
                     rowLevel = 7:9, FUN = sum)

## -----------------------------------------------------------------------------
assays(agg_both)[[1]]

## ----plot-exTree, fig.cap= "\\label{plot-exTree} An example tree with node labels and numbers in black and orange texts, respectively.", out.width="90%"----
data("tinyTree")
ggtree(tinyTree, branch.length = "none") +
    geom_text2(aes(label = label), hjust = -0.1, size = 5.5) +
    geom_text2(aes(label = node), vjust = -0.8,
               hjust = -0.2, color = 'orange', size = 5.5) 

## -----------------------------------------------------------------------------
convertNode(tree = tinyTree, node = c(12, 1, 4))

## -----------------------------------------------------------------------------
convertNode(tree = tinyTree, node = c("t4", "Node_18"))

## -----------------------------------------------------------------------------
# only the leaf nodes
findDescendant(tree = tinyTree, node = 17, only.leaf = TRUE)

## -----------------------------------------------------------------------------
# all descendant nodes
findDescendant(tree = tinyTree, node = 17, only.leaf = FALSE)

## -----------------------------------------------------------------------------
# tse: a TreeSummarizedExperiment object
# rowLeaf: specific leaves
subsetByLeaf <- function(tse, rowLeaf) {
  # if rowLeaf is provided as node labels, convert them to node numbers
  if (is.character(rowLeaf)) {
    rowLeaf <- convertNode(tree = rowTree(tse), node = rowLeaf)
  }
  
  # subset data by leaves
  sse <- subsetByNode(tse, rowNode = rowLeaf)
  
  # update the row tree
    ## -------------- new tree: drop leaves ----------
    oldTree <- rowTree(sse)
    newTree <- ape::keep.tip(phy = oldTree, tip = rowLeaf)
    
    ## -------------- update the row link ----------
    # track the tree
    track <- trackNode(oldTree)
    track <- ape::keep.tip(phy = track, tip = rowLeaf)
    
    # row links
    rowL <- rowLinks(sse)
    rowL <- DataFrame(rowL)
    
    # update the row links: 
    #   1. use the alias label to track and updates the nodeNum
    #   2. the nodeLab should be updated based on the new tree using the new
    #      nodeNum
    #   3. lastly, update the nodeLab_alias
    rowL$nodeNum <- convertNode(tree = track, node = rowL$nodeLab_alias,
                              message = FALSE)
    rowL$nodeLab <- convertNode(tree = newTree, node = rowL$nodeNum, 
                              use.alias = FALSE, message = FALSE)
    rowL$nodeLab_alias <- convertNode(tree = newTree, node = rowL$nodeNum, 
                                    use.alias = TRUE, message = FALSE)
    rowL$isLeaf <- isLeaf(tree = newTree, node = rowL$nodeNum)

    rowNL <- new("LinkDataFrame", rowL)
    
    ## update the row tree and links
    BiocGenerics:::replaceSlots(sse,
                              rowLinks = rowNL,
                              rowTree = list(phylo = newTree))
}


## -----------------------------------------------------------------------------
(both_sse <- subsetByLeaf(tse = both_tse, rowLeaf = c("t2", "t3")))
rowLinks(both_sse)

## -----------------------------------------------------------------------------
sessionInfo()

Try the TreeSummarizedExperiment package in your browser

Any scripts or data that you put into this service are public.

TreeSummarizedExperiment documentation built on Dec. 8, 2020, 2 a.m.