inst/doc/tximport.R

## ---- echo=FALSE--------------------------------------------------------------
library(knitr)
opts_chunk$set(tidy=TRUE,message=FALSE)

## -----------------------------------------------------------------------------
library(tximportData)
dir <- system.file("extdata", package="tximportData")
list.files(dir)

## -----------------------------------------------------------------------------
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
samples
files <- file.path(dir, "salmon", samples$run, "quant.sf.gz")
names(files) <- paste0("sample",1:6)
all(file.exists(files))

## -----------------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
k <- keys(txdb, keytype="TXNAME")
tx2gene <- select(txdb, k, "GENEID", "TXNAME")

## -----------------------------------------------------------------------------
library(readr)
tx2gene <- read_csv(file.path(dir, "tx2gene.gencode.v27.csv"))
head(tx2gene)

## -----------------------------------------------------------------------------
library(tximport)
txi <- tximport(files, type="salmon", tx2gene=tx2gene)
names(txi)
head(txi$counts)

## -----------------------------------------------------------------------------
txi.tx <- tximport(files, type="salmon", txOut=TRUE)

## -----------------------------------------------------------------------------
txi.sum <- summarizeToGene(txi.tx, tx2gene)
all.equal(txi$counts, txi.sum$counts)

## -----------------------------------------------------------------------------
files <- file.path(dir,"salmon", samples$run, "quant.sf.gz")
names(files) <- paste0("sample",1:6)
txi.salmon <- tximport(files, type="salmon", tx2gene=tx2gene)
head(txi.salmon$counts)

## -----------------------------------------------------------------------------
tx2knownGene <- read_csv(file.path(dir, "tx2gene.csv"))
files <- file.path(dir,"sailfish", samples$run, "quant.sf")
names(files) <- paste0("sample",1:6)
txi.sailfish <- tximport(files, type="sailfish", tx2gene=tx2knownGene)
head(txi.sailfish$counts)

## ----eval=FALSE---------------------------------------------------------------
#  txi <- tximport("quant.sf", type="none", txOut=TRUE,
#                  txIdCol="Name", abundanceCol="TPM",
#                  countsCol="NumReads", lengthCol="Length",
#                  importer=function(x) read_tsv(x, skip=8))

## -----------------------------------------------------------------------------
files <- file.path(dir,"salmon_gibbs", samples$run, "quant.sf.gz")
names(files) <- paste0("sample",1:6)
txi.inf.rep <- tximport(files, type="salmon", txOut=TRUE)
names(txi.inf.rep)
names(txi.inf.rep$infReps)
dim(txi.inf.rep$infReps$sample1)

## -----------------------------------------------------------------------------
files <- file.path(dir, "kallisto_boot", samples$run, "abundance.h5")
names(files) <- paste0("sample",1:6)
txi.kallisto <- tximport(files, type="kallisto", txOut=TRUE)
head(txi.kallisto$counts)

## -----------------------------------------------------------------------------
names(txi.kallisto)
names(txi.kallisto$infReps)
dim(txi.kallisto$infReps$sample1)

## -----------------------------------------------------------------------------
files <- file.path(dir, "kallisto", samples$run, "abundance.tsv.gz")
names(files) <- paste0("sample",1:6)
txi.kallisto.tsv <- tximport(files, type="kallisto", tx2gene=tx2gene,
                             ignoreAfterBar=TRUE)
head(txi.kallisto.tsv$counts)

## -----------------------------------------------------------------------------
files <- file.path(dir,"rsem", samples$run, paste0(samples$run, ".genes.results.gz"))
names(files) <- paste0("sample",1:6)
txi.rsem <- tximport(files, type="rsem", txIn=FALSE, txOut=FALSE)
head(txi.rsem$counts)

## -----------------------------------------------------------------------------
files <- file.path(dir,"rsem", samples$run, paste0(samples$run, ".isoforms.results.gz"))
names(files) <- paste0("sample",1:6)
txi.rsem <- tximport(files, type="rsem", txIn=TRUE, txOut=TRUE)
head(txi.rsem$counts)

## ---- eval=FALSE--------------------------------------------------------------
#  tmp <- read_tsv(files[1])
#  tx2gene <- tmp[,c("t_name","gene_name")]
#  txi <- tximport(files, type="stringtie", tx2gene=tx2gene)

## ---- eval=FALSE--------------------------------------------------------------
#  files <- "path/to/alevin/quants_mat.gz"
#  txi <- tximport(files, type="alevin")

## ---- results="hide", messages=FALSE------------------------------------------
library(edgeR)
library(csaw)

## -----------------------------------------------------------------------------
cts <- txi$counts
normMat <- txi$length

# Obtaining per-observation scaling factors for length,
# adjusted to avoid changing the magnitude of the counts.
normMat <- normMat / exp(rowMeans(log(normMat)))
normCts <- cts / normMat

# Computing effective library sizes from scaled counts,
# to account for composition biases between samples.
library(edgeR)
eff.lib <- calcNormFactors(normCts) * colSums(normCts)

# Combining effective library sizes with the length factors,
# and calculating offsets for a log-link GLM.
normMat <- sweep(normMat, 2, eff.lib, "*")
normMat <- log(normMat)

# Creating a DGEList object for use in edgeR.
y <- DGEList(cts)
y <- scaleOffset(y, normMat)
# filtering
keep <- filterByExpr(y)
y <- y[keep,]
# y is now ready for estimate dispersion functions
# see edgeR User's Guide

## -----------------------------------------------------------------------------
se <- SummarizedExperiment(assays=list(counts=y$counts, offset=y$offset))
se$totals <- y$samples$lib.size
library(csaw)
cpms <- calculateCPM(se, use.offsets=TRUE, log=FALSE)

## ---- results="hide", messages=FALSE------------------------------------------
library(DESeq2)

## -----------------------------------------------------------------------------
sampleTable <- data.frame(condition=factor(rep(c("A","B"),each=3)))
rownames(sampleTable) <- colnames(txi$counts)

## -----------------------------------------------------------------------------
dds <- DESeqDataSetFromTximport(txi, sampleTable, ~ condition)
# dds is now ready for DESeq()
# see DESeq2 vignette

## -----------------------------------------------------------------------------
files <- file.path(dir,"salmon", samples$run, "quant.sf.gz")
names(files) <- paste0("sample",1:6)
txi <- tximport(files, type="salmon",
                tx2gene=tx2gene,
                countsFromAbundance="lengthScaledTPM")
library(limma)
y <- DGEList(txi$counts)
# filtering
keep <- filterByExpr(y)
y <- y[keep,]
y <- calcNormFactors(y)
design <- model.matrix(~ condition, data=sampleTable)
v <- voom(y, design)
# v is now ready for lmFit()
# see limma User's Guide

## -----------------------------------------------------------------------------
sessionInfo()

Try the tximport package in your browser

Any scripts or data that you put into this service are public.

tximport documentation built on Nov. 8, 2020, 8:20 p.m.