inst/doc/tximeta.R

## -----------------------------------------------------------------------------
dir <- system.file("extdata/salmon_dm", package="tximportData")
files <- file.path(dir, "SRR1197474", "quant.sf") 
file.exists(files)
coldata <- data.frame(files, names="SRR1197474", condition="A", stringsAsFactors=FALSE)
coldata

## ----eval=FALSE---------------------------------------------------------------
#  library(tximeta)
#  se <- tximeta(coldata)

## -----------------------------------------------------------------------------
indexDir <- file.path(dir, "Dm.BDGP6.22.98_salmon-0.14.1")
fastaFTP <- c("ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP6.22.cdna.all.fa.gz",
              "ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/ncrna/Drosophila_melanogaster.BDGP6.22.ncrna.fa.gz")
gtfPath <- file.path(dir,"Drosophila_melanogaster.BDGP6.22.98.gtf.gz")
suppressPackageStartupMessages(library(tximeta))
makeLinkedTxome(indexDir=indexDir,
                source="Ensembl",
                organism="Drosophila melanogaster",
                release="98",
                genome="BDGP6.22",
                fasta=fastaFTP,
                gtf=gtfPath,
                write=FALSE)

## -----------------------------------------------------------------------------
library(tximeta)
se <- tximeta(coldata)

## ----echo=FALSE---------------------------------------------------------------
dir2 <- system.file("extdata", package="tximeta")
tab <- read.csv(file.path(dir2, "hashtable.csv"),
                stringsAsFactors=FALSE)
release.range <- function(tab, source, organism) {
  tab.idx <- tab$organism == organism & tab$source == source
  rels <- tab$release[tab.idx]
  if (organism == "Mus musculus" & source == "GENCODE") {
    paste0("M", range(as.numeric(sub("M","",rels))))
  } else if (source == "RefSeq") {
    paste0("p", range(as.numeric(sub(".*p","",rels))))
  } else {
    range(as.numeric(rels))
  }
}
dat <- data.frame(
  source=rep(c("GENCODE","Ensembl","RefSeq"),c(2,3,2)),
  organism=c("Homo sapiens","Mus musculus",
             "Drosophila melanogaster")[c(1:2,1:3,1:2)]
)
rng <- t(sapply(seq_len(nrow(dat)), function(i)
  release.range(tab, dat[i,1], dat[i,2])))
dat$releases <- paste0(rng[,1], "-", rng[,2])
knitr::kable(dat)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(SummarizedExperiment))
colData(se)

## -----------------------------------------------------------------------------
assayNames(se)

## -----------------------------------------------------------------------------
rowRanges(se)

## -----------------------------------------------------------------------------
seqinfo(se)

## -----------------------------------------------------------------------------
edb <- retrieveDb(se)
class(edb)

## -----------------------------------------------------------------------------
se.exons <- addExons(se)
rowRanges(se.exons)[[1]]

## -----------------------------------------------------------------------------
gse <- summarizeToGene(se)
rowRanges(gse)

## -----------------------------------------------------------------------------
library(org.Dm.eg.db)
gse <- addIds(gse, "REFSEQ", gene=TRUE)
mcols(gse)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(DESeq2))
# here there is a single sample so we use ~1.
# expect a warning that there is only a single sample...
suppressWarnings({dds <- DESeqDataSet(gse, ~1)})
# ... see DESeq2 vignette

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(edgeR))
y <- makeDGEList(gse)
# ... see edgeR User's Guide for further steps

## -----------------------------------------------------------------------------
cts <- assays(gse)[["counts"]]
normMat <- assays(gse)[["length"]]
normMat <- normMat / exp(rowMeans(log(normMat)))
o <- log(calcNormFactors(cts/normMat)) + log(colSums(cts/normMat))
y <- DGEList(cts)
y <- scaleOffset(y, t(t(log(normMat)) + o))
# ... see edgeR User's Guide for further steps

## ----eval=FALSE---------------------------------------------------------------
#  y <- se # rename the object to 'y'
#  library(fishpond)
#  # if inferential replicates existed in the data,
#  # analysis would begin with:
#  #
#  # y <- scaleInfReps(y)
#  # ... see Swish vignette in the fishpond package

## -----------------------------------------------------------------------------
gse <- summarizeToGene(se, countsFromAbundance="lengthScaledTPM")
library(limma)
y <- DGEList(assays(gse)[["counts"]])
# see limma User's Guide for further steps

## -----------------------------------------------------------------------------
metadata(gse)$countsFromAbundance 

## -----------------------------------------------------------------------------
names(metadata(se))
str(metadata(se)[["quantInfo"]])
str(metadata(se)[["txomeInfo"]])
metadata(se)[["tximetaInfo"]]
metadata(se)[["txdbInfo"]]

## -----------------------------------------------------------------------------
library(BiocFileCache)
bfc <- BiocFileCache()

## -----------------------------------------------------------------------------
file <- file.path(dir, "SRR1197474.plus", "quant.sf")
file.exists(file)
coldata <- data.frame(files=file, names="SRR1197474", sample="1",
                      stringsAsFactors=FALSE)

## -----------------------------------------------------------------------------
se <- tximeta(coldata)

## -----------------------------------------------------------------------------
indexDir <- file.path(dir, "Dm.BDGP6.22.98.plus_salmon-0.14.1")

## -----------------------------------------------------------------------------
fastaFTP <- c("ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP6.22.cdna.all.fa.gz",
              "ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/ncrna/Drosophila_melanogaster.BDGP6.22.ncrna.fa.gz",
              "extra_transcript.fa.gz")
#gtfFTP <- "ftp://path/to/custom/Drosophila_melanogaster.BDGP6.22.98.plus.gtf.gz"

## -----------------------------------------------------------------------------
gtfPath <- file.path(dir,"Drosophila_melanogaster.BDGP6.22.98.plus.gtf.gz")

## -----------------------------------------------------------------------------
tmp <- tempdir()
jsonFile <- file.path(tmp, paste0(basename(indexDir), ".json"))
makeLinkedTxome(indexDir=indexDir,
                source="Ensembl", organism="Drosophila melanogaster",
                release="98", genome="BDGP6.22",
                fasta=fastaFTP, gtf=gtfPath,
                jsonFile=jsonFile)

## -----------------------------------------------------------------------------
se <- tximeta(coldata)

## -----------------------------------------------------------------------------
rowRanges(se)
seqinfo(se)

## -----------------------------------------------------------------------------
library(BiocFileCache)
if (interactive()) {
  bfcloc <- getTximetaBFC()
} else {
  bfcloc <- tempdir()
}
bfc <- BiocFileCache(bfcloc)
bfcinfo(bfc)
bfcremove(bfc, bfcquery(bfc, "linkedTxomeTbl")$rid)
bfcinfo(bfc)

## -----------------------------------------------------------------------------
jsonFile <- file.path(tmp, paste0(basename(indexDir), ".json"))
loadLinkedTxome(jsonFile)

## -----------------------------------------------------------------------------
se <- tximeta(coldata)

## -----------------------------------------------------------------------------
if (interactive()) {
  bfcloc <- getTximetaBFC()
} else {
  bfcloc <- tempdir()
}
bfc <- BiocFileCache(bfcloc)
bfcinfo(bfc)
bfcremove(bfc, bfcquery(bfc, "linkedTxomeTbl")$rid)
bfcinfo(bfc)

## -----------------------------------------------------------------------------
library(devtools)
session_info()

Try the tximeta package in your browser

Any scripts or data that you put into this service are public.

tximeta documentation built on April 13, 2021, 6:01 p.m.