Nothing
#' Run differential TE analysis
#'
#' Creates a total of 3 DESeq models (given x is design argument input
#' (usually stage or condition) and libraryType is RNA-seq and Ribo-seq):\cr
#' 1. Ribo-seq model: design = ~ x (differences between the x groups in Ribo-seq)\cr
#' 2. RNA-seq model: design = ~ x (differences between the x groups in RNA-seq)\cr
#' 3. TE model: design = ~ x + libraryType + libraryType:x
#' (differences between the x and libraryType groups and the interaction between them)\cr
#' Using an equal reimplementation of the deltaTE algorithm (see reference).
#' You need at least 2 groups and 2 replicates per group. The Ribo-seq counts will
#' be over CDS and RNA-seq over mRNAs, per transcript. \cr
#'
#' #' If you do not need isoform variants, subset to longest isoform in
#' the returned object (See examples). If you do not have RNA-seq controls,
#' you can still use DESeq on Ribo-seq alone.
#' \cr The LFC values are shrunken by lfcShrink(type = "normal").\cr \cr
#' What the deltaTE plot calls intensified is here called mRNA abundance and
#' forwarded is called Buffering.\cr Remember that DESeq by default can not
#' do global change analysis, it can only find subsets with change in LFC.
#' @inheritParams DTEG.plot
#' @param df.rfp a \code{\link{experiment}} of Ribo-seq or 80S from TCP-seq.
#' @param df.rna a \code{\link{experiment}} of RNA-seq
#' @param design a character vector, default "stage". The columns in the
#' ORFik experiment that represent the comparison contrasts. Usually found
#' in "stage", "condition" or "fraction" column.
#' @param output.dir output.dir directory to save plots,
#' plot will be named "TE_between.png". If NULL, will not save.
#' @param RFP_counts a SummarizedExperiment, default:
#' countTable(df.rfp, "cds", type = "summarized"), all transcripts.
#' Assign a subset if you don't want to analyze all genes.
#' It is recommended to not subset, to give DESeq2 data for variance analysis.
#' @param RNA_counts a SummarizedExperiment, default:
#' countTable(df.rna, "mrna", type = "summarized"), all transcripts.
#' Assign a subset if you don't want to analyze all genes.
#' It is recommended to not subset, to give DESeq2 data for variance analysis.
#' @param batch.effect, logical, default FALSE. If you believe you might have batch effects,
#' set to TRUE, will use replicate column to represent batch effects.
#' Batch effect usually means that you have a strong variance between
#' biological replicates. Check PCA plot on count tables to verify if
#' you need to set it to TRUE.
#' @references doi: 10.1002/cpmb.108
#' @return a data.table with 9 columns.
#' (log fold changes, p.ajust values, group, regulation status and gene id)
#' @family TE
#' @export
#' @import DESeq2
#' @importFrom data.table rbindlist
#' @examples
#' ## Simple example
#' #df.rfp <- read.experiment("Riboseq")
#' #df.rna <- read.experiment("RNAseq")
#' #dt <- DTEG.analysis(df.rfp, df.rna)
#' ## Restrict DTEGs by log fold change (LFC):
#' ## subset to abs(LFC) < 1.5 for both rfp and rna
#' #dt[abs(rfp) < 1.5 & abs(rna) < 1.5, Regulation := "No change"]
#'
#' ## Only longest isoform per gene:
#' #tx_longest <- filterTranscripts(df.rfp, 0, 1, 0)
#' #dt <- dt[id %in% tx_longest,]
#' ## Convert to gene id
#' #dt[, id := txNamesToGeneNames(id, df.rfp)]
#' ## To get by gene symbol, use biomaRt conversion
DTEG.analysis <- function(df.rfp, df.rna,
output.dir = paste0(dirname(df.rfp$filepath[1]),
"/QC_STATS/"),
design = "stage", p.value = 0.05,
RFP_counts = countTable(df.rfp, "cds", type = "summarized"),
RNA_counts = countTable(df.rna, "mrna", type = "summarized"),
batch.effect = FALSE,
plot.title = "", width = 6,
height = 6, dot.size = 0.4,
relative.name = "DTEG_plot.png") {
if (!is(df.rfp, "experiment") | !is(df.rna, "experiment"))
stop("df.rfp and df.rna must be ORFik experiments!")
if (length(unique(unlist(df.rfp[, design]))) == 1)
stop("Design column needs at least 2 unique values!")
if (nrow(df.rfp) < 4)
stop("Experiment needs at least 4 rows, with minimum 2 per design group!")
if (p.value > 1 | p.value <= 0)
stop("p.value must be in interval (0,1]")
if (!is(RFP_counts, "SummarizedExperiment") |
!is(RNA_counts, "SummarizedExperiment")) stop("counts must be of type SummarizedExperiment")
if (nrow(RFP_counts) != nrow(RNA_counts)) stop("counts must have equall number of rows!")
# Designs
be <- ifelse(batch.effect, "replicate + ", "")
te.design <- as.formula(paste0("~ libtype + ", be, design, "+ libtype:", design))
main.design <- as.formula(paste0("~ ", be, design))
# TE
se <- cbind(assay(RFP_counts), assay(RNA_counts))
colData <- rbind(colData(RFP_counts), colData(RNA_counts))
combined_se <- SummarizedExperiment(se,
rowRanges = rowRanges(RFP_counts),
colData = colData)
#combined_se <- combined_se[rowMeans(assay(combined_se)) > 1,]
message("----------------------"); message("Model 1/3: TE")
message("----------------------")
combined_DESEQ <- DESeqDataSet(combined_se, design = te.design)
dds.te <- DESeq2::DESeq(combined_DESEQ)
resultsNames(dds.te)
# Ribo
message("----------------------"); message("Model 2/3: Ribo-seq")
message("----------------------")
ddsMat_ribo <- DESeqDataSet(se = RFP_counts, design = main.design)
ddsMat_ribo <- DESeq(ddsMat_ribo)
# RNA
message("----------------------"); message("Model 3/3: RNA-seq")
message("----------------------")
ddsMat_rna <- DESeqDataSet(se = RNA_counts, design = main.design)
ddsMat_rna <- DESeq(ddsMat_rna)
message("----------------------")
# Create contrasts
pairs <- combn.pairs(unlist(df.rfp[, design]))
# Per contrast
dt.between <- data.table()
#dt.between <- bplapply(pairs[1:2], FUN = function(i, dds.te, ddsMat_ribo, ddsMat_rna, design) {
for(i in pairs) {
name <- paste("Comparison:", i[1], "vs", i[2])
message(name)
# Results
current.contrast <- c(design, i[1], i[2])
res_te <- results(dds.te, contrast = current.contrast)
res_ribo <- results(ddsMat_ribo, contrast=current.contrast)
suppressMessages(res_ribo <- lfcShrink(ddsMat_ribo, contrast=current.contrast,
res=res_ribo, type = "normal"))
res_rna <- results(ddsMat_rna, contrast = current.contrast)
suppressMessages(res_rna <- lfcShrink(ddsMat_rna, contrast = current.contrast,
res = res_rna, type = "normal"))
# The differential regulation groupings (padj is padjusted)
both <- which(res_te$padj < p.value & res_ribo$padj < p.value & res_rna$padj < p.value)
## The 4 classes of genes
forwarded <- rownames(res_te)[which(res_te$padj > p.value & res_ribo$padj < p.value & res_rna$padj < p.value)]
exclusive <- rownames(res_te)[which(res_te$padj < p.value & res_ribo$padj < p.value & res_rna$padj > p.value)]
intensified <- rownames(res_te)[both[which(res_te[both, 2]*res_rna[both, 2] > 0)]]
buffered <- rownames(res_te)[both[which(res_te[both, 2]*res_rna[both, 2] < 0)]]
buffered <- c(rownames(res_te)[which(res_te$padj < p.value & res_ribo$padj > p.value & res_rna$padj < p.value)],
buffered)
n <- rownames(res_te)
Regulation <- rep("No change", nrow(res_te))
Regulation[n %in% forwarded] <- "Buffering"
Regulation[n %in% exclusive] <- "Translation"
Regulation[n %in% intensified] <- "mRNA abundance"
Regulation[n %in% buffered] <- "Buffering"
print(table(Regulation))
dt.between <-
rbindlist(list(dt.between,
data.table(variable = name,
Regulation = Regulation,
id = rownames(ddsMat_rna),
rna = res_rna$log2FoldChange,
rfp = res_ribo$log2FoldChange,
te = res_te$log2FoldChange,
rna.padj = res_rna$padj,
rfp.padj = res_ribo$padj,
te.padj = res_te$padj
)))
}#, dds.te = dds.te, ddsMat_ribo = ddsMat_ribo, ddsMat_rna = ddsMat_rna, design = design)
#dt.between <- rbindlist(dt.between)
# Plot
dt.between[, Regulation :=
factor(Regulation,
levels = c("No change", "Translation", "Buffering", "mRNA abundance"),
ordered = TRUE)]
plot <- DTEG.plot(dt.between, output.dir, p.value, plot.title, width, height,
dot.size, relative.name = relative.name)
return(dt.between)
}
#' Create a TE table
#'
#' Creates a data.table with 6 columns, column names are:\cr
#' variable, rfp_log2, rna_log2, rna_log10, TE_log2, id
#' @inheritParams DTEG.analysis
#' @inheritParams countTable
#' @param filter.rfp numeric, default 1. What is the minimum fpkm value?
#' @param filter.rna numeric, default 1. What is the minimum fpkm value?
#' @return a data.table with 6 columns
#' @family TE
#' @export
#' @examples
#' #df.rfp <- read.experiment("Riboseq")
#' #df.rna <- read.experiment("RNAseq")
#' #te.table(df.rfp, df.rna)
te.table <- function(df.rfp, df.rna,
filter.rfp = 1, filter.rna = 1,
collapse = FALSE) {
if (!is(df.rfp, "experiment") | !is(df.rna, "experiment"))
stop("df.rfp and df.rna must be ORFik experiments!")
RNA_MRNA_FPKM <- countTable(df.rna, "mrna", type = "fpkm", collapse = collapse)
RNA_MRNA_FPKM <- data.table(id = rownames(RNA_MRNA_FPKM), RNA_MRNA_FPKM)
RFP_CDS_FPKM <- countTable(df.rfp, "cds", type = "fpkm", collapse = collapse)
RFP_CDS_FPKM <- data.table(id = rownames(RFP_CDS_FPKM), RFP_CDS_FPKM)
if (!identical(nrow(RNA_MRNA_FPKM), nrow(RFP_CDS_FPKM)))
stop("Not equal rows in count tables, did you match rfp and rna from different genome builds?")
single.rna <- length(bamVarName(df.rna, skip.libtype = TRUE)) == 1
if (!single.rna & !identical(length(bamVarName(df.rfp, skip.libtype = TRUE)),
length(bamVarName(df.rna, skip.libtype = TRUE))))
stop("Not equal samples in rfp and rna, did you subset or reorder one of the experiments?")
if ((filter.rfp < 0) | (filter.rna < 0))
stop("filter value is < 0, not allowed!")
dt <- data.table::merge.data.table(RNA_MRNA_FPKM, RFP_CDS_FPKM, by = "id")
filtered <- rowMin(as.matrix(dt[,-1])) > max(filter.rna, filter.rfp)
txNames <- dt$id; dt$id <- NULL
#dt <- dt[RNA_MRNA_FPKM > filter.rfp & RFP_CDS_FPKM > filter.rna, ]
dt <- dt[filtered, ]
dt.log <- pseudo.transform(dt)
dt.log10 <- pseudo.transform(dt, log10)
dt.melt.rna <- melt(dt.log[, colnames(dt.log) %in% colnames(RNA_MRNA_FPKM)[-1], with = FALSE])
dt.melt.rna.10 <- melt(dt.log10[, colnames(dt.log10) %in% colnames(RNA_MRNA_FPKM)[-1], with = FALSE])
dt.melt.rfp <- melt(dt.log[, colnames(dt.log) %in% colnames(RFP_CDS_FPKM)[-1], with = FALSE])
dt.final <- cbind(dt.melt.rfp, dt.melt.rna$value, dt.melt.rna.10$value)
filter.names <- paste("RFP", "LSU", paste0(df.rfp@experiment, "_"), sep = "|")
dt.final[, variable := gsub(filter.names, "", variable)]
dt.final[, variable := gsub("^_|_$", "", variable)]
colnames(dt.final) <- c("variable", "rfp_log2","rna_log2", "rna_log10")
dt.final[, TE_log2 := rfp_log2 - rna_log2]
dt.final$id <- rep(txNames[filtered], length(unique(dt.final$variable)))
message(paste("Filter kept", round((nrow(dt) / length(txNames)) *100, 1), "% of transcripts"))
return(dt.final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.