View source: R/merge_rse_metrics.R
merge_rse_metrics | R Documentation |
This function merges the sample metadata from a RangedSummarizedExperiment object for some of the variables that we (at Jaffelab) typically create with our RNA-seq processing pipeline for samples that are sequenced in more than one lane.
merge_rse_metrics(rse)
rse |
A RangedSummarizedExperiment-class object with 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' as either NumericList() or IntegerList() objects in the colData() columns. |
A RangedSummarizedExperiment-class with merged columns for 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr'
Leonardo Collado-Torres, Andrew E Jaffe
## Taken from ?SummarizedExperiment
library("SummarizedExperiment")
nrows <- 200
ncols <- 6
set.seed(20181116)
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
IRanges(floor(runif(200, 1e5, 1e6)), width = 100),
strand = sample(c("+", "-"), 200, TRUE),
feature_id = sprintf("ID%03d", seq_len(200))
)
## Function for generating some data
rand_gen <- function(mu = 1e6, sd = 1e3, digits = 0) {
IRanges::NumericList(lapply(seq_len(6), function(x) {
round(rnorm(2, mu, sd), digits)
}))
}
colData <- DataFrame(
Treatment = rep(c("ChIP", "Input"), 3),
row.names = LETTERS[seq_len(6)],
concordMapRate = rand_gen(0.5, 0.05, 10),
overallMapRate = rand_gen(0.7, 0.05, 10),
mitoRate = rand_gen(0.2, 0.05, 10),
rRNA_rate = rand_gen(0.1, 0.01, 10),
totalAssignedGene = rand_gen(0.5, 0.05, 10),
numMapped = rand_gen(),
numReads = rand_gen(1e6 + 6e5),
numUnmapped = rand_gen(5e5),
mitoMapped = rand_gen(1e5),
totalMapped = rand_gen(1e6 + 1e5),
ERCCsumLogErr = rand_gen(-28.5, 4, 6)
)
rse <- SummarizedExperiment(
assays = SimpleList(counts = counts),
rowRanges = rowRanges, colData = colData
)
colData(rse)
rse_merged <- merge_rse_metrics(rse)
colData(rse_merged)
## Use the BSP2 real data
local_bsp2 <- tempfile("BSP2_rse_gene.Rdata")
download.file(
"https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_gene_unfiltered.Rdata",
destfile = local_bsp2,
mode = "wb"
)
load(local_bsp2, verbose = TRUE)
# lobstr::obj_size(rse_gene)
# 872.12 MB
bsp2_rse_merged <- merge_rse_metrics(rse_gene)
colData(bsp2_rse_merged)
## Compare the ERCCsumLogErr approximation against the mean of the
## error values. Note that you would actually have to load the actual
## ERCC quantification output files in order to compute the correct
## ERCCsumLogErr values.
bsp2_rse_merged$mean_ERCCsumLogErr <-
sapply(rse_gene$ERCCsumLogErr, mean)
with(
colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
plot(
mean_ERCCsumLogErr,
ERCCsumLogErr,
ylab = "Approximate ERCCsumLogErr"
)
)
abline(a = 0, b = 1, col = "red")
with(
colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
summary(mean_ERCCsumLogErr - ERCCsumLogErr)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.