merge_rse_metrics: Merge sample metadata across several columns

View source: R/merge_rse_metrics.R

merge_rse_metricsR Documentation

Merge sample metadata across several columns

Description

This function merges the sample metadata from a RangedSummarizedExperiment object for some of the variables that we (at Jaffelab) typically create with our RNA-seq processing pipeline for samples that are sequenced in more than one lane.

Usage

merge_rse_metrics(rse)

Arguments

rse

A RangedSummarizedExperiment-class object with 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' as either NumericList() or IntegerList() objects in the colData() columns.

Value

A RangedSummarizedExperiment-class with merged columns for 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr'

Author(s)

Leonardo Collado-Torres, Andrew E Jaffe

Examples


## Taken from ?SummarizedExperiment
library("SummarizedExperiment")
nrows <- 200
ncols <- 6
set.seed(20181116)
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
    IRanges(floor(runif(200, 1e5, 1e6)), width = 100),
    strand = sample(c("+", "-"), 200, TRUE),
    feature_id = sprintf("ID%03d", seq_len(200))
)

## Function for generating some data
rand_gen <- function(mu = 1e6, sd = 1e3, digits = 0) {
    IRanges::NumericList(lapply(seq_len(6), function(x) {
        round(rnorm(2, mu, sd), digits)
    }))
}
colData <- DataFrame(
    Treatment = rep(c("ChIP", "Input"), 3),
    row.names = LETTERS[seq_len(6)],
    concordMapRate = rand_gen(0.5, 0.05, 10),
    overallMapRate = rand_gen(0.7, 0.05, 10),
    mitoRate = rand_gen(0.2, 0.05, 10),
    rRNA_rate = rand_gen(0.1, 0.01, 10),
    totalAssignedGene = rand_gen(0.5, 0.05, 10),
    numMapped = rand_gen(),
    numReads = rand_gen(1e6 + 6e5),
    numUnmapped = rand_gen(5e5),
    mitoMapped = rand_gen(1e5),
    totalMapped = rand_gen(1e6 + 1e5),
    ERCCsumLogErr = rand_gen(-28.5, 4, 6)
)
rse <- SummarizedExperiment(
    assays = SimpleList(counts = counts),
    rowRanges = rowRanges, colData = colData
)
colData(rse)
rse_merged <- merge_rse_metrics(rse)
colData(rse_merged)

## Use the BSP2 real data
local_bsp2 <- tempfile("BSP2_rse_gene.Rdata")
download.file(
    "https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_gene_unfiltered.Rdata",
    destfile = local_bsp2,
    mode = "wb"
)
load(local_bsp2, verbose = TRUE)
# lobstr::obj_size(rse_gene)
# 872.12 MB
bsp2_rse_merged <- merge_rse_metrics(rse_gene)
colData(bsp2_rse_merged)

## Compare the ERCCsumLogErr approximation against the mean of the
## error values. Note that you would actually have to load the actual
## ERCC quantification output files in order to compute the correct
## ERCCsumLogErr values.
bsp2_rse_merged$mean_ERCCsumLogErr <-
    sapply(rse_gene$ERCCsumLogErr, mean)
with(
    colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
    plot(
        mean_ERCCsumLogErr,
        ERCCsumLogErr,
        ylab = "Approximate ERCCsumLogErr"
    )
)
abline(a = 0, b = 1, col = "red")
with(
    colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
    summary(mean_ERCCsumLogErr - ERCCsumLogErr)
)


LieberInstitute/jaffelab documentation built on Dec. 15, 2024, 10:44 p.m.