inst/doc/Organism.dplyr.R

## ---- echo=FALSE--------------------------------------------------------------
suppressPackageStartupMessages({
    library(Organism.dplyr)
    library(GenomicRanges)
    library(ggplot2)
})

## ---- eval=FALSE--------------------------------------------------------------
#  library(Organism.dplyr)

## ---- eval=FALSE--------------------------------------------------------------
#  src <- src_organism("TxDb.Hsapiens.UCSC.hg38.knownGene")

## ---- eval=FALSE--------------------------------------------------------------
#  path <- "path/to/my.sqlite"
#  src <- src_organism("TxDb.Hsapiens.UCSC.hg38.knownGene", path)

## -----------------------------------------------------------------------------
supportedOrganisms()

## ---- eval=FALSE--------------------------------------------------------------
#  src <- src_ucsc("human", path)

## -----------------------------------------------------------------------------
src <- src_organism(dbpath = hg38light())
src

## -----------------------------------------------------------------------------
src_tbls(src)

## -----------------------------------------------------------------------------
tbl(src, "id")

## -----------------------------------------------------------------------------
colnames(tbl(src, "id"))

## -----------------------------------------------------------------------------
tbl(src, "id") %>%
    filter(symbol %like% "SNORD%") %>%
    dplyr::select(entrez, map, ensembl, symbol) %>%
    distinct() %>% arrange(symbol) %>% collect()

## -----------------------------------------------------------------------------
inner_join(tbl(src, "id"), tbl(src, "id_go")) %>%
    filter(symbol == "ADA") %>%
    dplyr::select(entrez, ensembl, symbol, go, evidence, ontology)

## -----------------------------------------------------------------------------
txcount <- inner_join(tbl(src, "id"), tbl(src, "ranges_tx")) %>%
    dplyr::select(symbol, tx_id) %>%
    group_by(symbol) %>%
    summarize(count = n()) %>%
    dplyr::select(symbol, count) %>%
    arrange(desc(count)) %>%
    collect(n=Inf)

txcount

library(ggplot2)
ggplot(txcount, aes(x = symbol, y = count)) + 
    geom_bar(stat="identity") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    ggtitle("Transcript count") +
    labs(x = "Symbol") +
    labs(y = "Count")

## -----------------------------------------------------------------------------
inner_join(tbl(src, "id"), tbl(src, "ranges_gene")) %>%
    filter(symbol %in% c("ADA", "NAT2")) %>%
    dplyr::select(gene_chrom, gene_start, gene_end, gene_strand,
                  symbol, map) %>%
    collect() %>% GenomicRanges::GRanges()

## -----------------------------------------------------------------------------
keytypes(src)

## -----------------------------------------------------------------------------
columns(src)

## -----------------------------------------------------------------------------
head(keys(src))

## -----------------------------------------------------------------------------
head(keys(src, "symbol"))

## -----------------------------------------------------------------------------
keytype <- "symbol"
keys <- c("ADA", "NAT2")
columns <- c("entrez", "tx_id", "tx_name","exon_id")
select_tbl(src, keys, columns, keytype)

## -----------------------------------------------------------------------------
mapIds(src, keys, column = "tx_name", keytype)
mapIds(src, keys, column = "tx_name", keytype, multiVals="CharacterList")

## -----------------------------------------------------------------------------
supportedFilters(src)

## -----------------------------------------------------------------------------
EnsemblFilter("ENSG00000196839")
SymbolFilter("SNORD", "startsWith")

## -----------------------------------------------------------------------------
smbl <- SymbolFilter("SNORD", "startsWith")
transcripts_tbl(src, filter=smbl)
filter <- AnnotationFilterList(smbl)
transcripts_tbl(src, filter=filter)
transcripts_tbl(src, filter=~smbl) 

## -----------------------------------------------------------------------------
gr <- GRangesFilter(GenomicRanges::GRanges("chr15:25062333-25065121"))
transcripts(src, filter=~smbl & gr)

## -----------------------------------------------------------------------------
transcripts_tbl(src, filter = AnnotationFilterList(
    SymbolFilter("ADA"),
    TxStartFilter(44619810,"<"),
    logicOp="&")
)
## Equivalent to
transcripts_tbl(src, filter = ~symbol == "ADA" & tx_start < 44619810)

## -----------------------------------------------------------------------------
txend <- TxEndFilter(243843236, '==')
transcripts_tbl(src, filter = ~symbol == "ADA" | txend)

## -----------------------------------------------------------------------------
transcripts_tbl(src, filter = ~symbol == "ADA" & !tx_start < 44618910)

## -----------------------------------------------------------------------------
transcripts_tbl(src,
    filter = ~(symbol == 'ADA' & !(tx_start >= 44619810 | tx_end < 44651742)) | 
              (smbl & !tx_end > 25056954)
)

## -----------------------------------------------------------------------------
sessionInfo()

Try the Organism.dplyr package in your browser

Any scripts or data that you put into this service are public.

Organism.dplyr documentation built on Nov. 8, 2020, 5:45 p.m.