inst/doc/BiocSet.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----bioconductor, eval = FALSE-----------------------------------------------
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("BiocSet")

## ----github, eval = FALSE-----------------------------------------------------
#  BiocManager::install("Kayla-Morrell/BiocSet")

## ----load, message = FALSE----------------------------------------------------
library(BiocSet)

## ----constructor--------------------------------------------------------------
tbl <- BiocSet(set1 = letters, set2 = LETTERS)
tbl

## ----gmt----------------------------------------------------------------------
gmtFile <- system.file(package = "BiocSet",
                        "extdata",
                        "hallmark.gene.symbol.gmt")
tbl2 <- import(gmtFile)
tbl2

## ----export, tidy = TRUE------------------------------------------------------
fl <- tempfile(fileext = ".gmt")
gmt <- export(tbl2, fl)
gmt

## ----obo, eval = FALSE--------------------------------------------------------
#  download.file("http://current.geneontology.org/ontology/go.obo", "obo_file.obo")
#  
#  foo <- import("obo_file.obo", extract_tag = "everything")
#  
#  small_tst <- es_element(foo)[1,] %>%
#      unnest("ancestors") %>%
#      select("element", "ancestors") %>%
#      unlist() %>%
#      unique()
#  
#  small_oboset <- foo %>% filter_elementset(element %in% small_tst)
#  
#  fl <- tempfile(fileext = ".obo")
#  export(small_oboset, fl)

## ----small_obo----------------------------------------------------------------
oboFile <- system.file(package = "BiocSet",
                        "extdata",
                        "sample_go.obo")
obo <- import(oboFile)
obo

## ----activate-----------------------------------------------------------------
tbl <- BiocSet(set1 = letters, set2 = LETTERS)
tbl
tbl %>% filter_element(element == "a" | element == "A")
tbl %>% mutate_set(pval = rnorm(1:2))
tbl %>% arrange_elementset(desc(element))

## ----set_ops------------------------------------------------------------------
# union of two BiocSet objects
es1 <- BiocSet(set1 = letters[c(1:3)], set2 = LETTERS[c(1:3)])
es2 <- BiocSet(set1 = letters[c(2:4)], set2 = LETTERS[c(2:4)])
union(es1, es2)

# union within a single BiocSet object
es3 <- BiocSet(set1 = letters[c(1:10)], set2 = letters[c(4:20)])
union_single(es3)

## ----airway, message = FALSE--------------------------------------------------
library(airway)
data("airway")
se <- airway

## ----go_sets, message = FALSE-------------------------------------------------
library(org.Hs.eg.db)
go <- go_sets(org.Hs.eg.db, "ENSEMBL")
go

# an example of subsetting by evidence type
go_sets(org.Hs.eg.db, "ENSEMBL", evidence = c("IPI", "TAS"))

## ----drop_assays--------------------------------------------------------------
se1 = se[rowSums(assay(se)) != 0,]
go %>% filter_element(element %in% rownames(se1))

## ----count--------------------------------------------------------------------
go %>% group_by(set) %>% dplyr::count()

## ----empty--------------------------------------------------------------------
drop <- es_activate(go, elementset) %>% group_by(set) %>%
    dplyr::count() %>% filter(n == 0) %>% pull(set)
go %>% filter_set(!(set %in% drop))

## ----map_unique---------------------------------------------------------------
go %>% map_unique(org.Hs.eg.db, "ENSEMBL", "SYMBOL")

## ----adding, message = FALSE--------------------------------------------------
library(GO.db)
map <- map_add_set(go, GO.db, "GOID", "DEFINITION")
go %>% mutate_set(definition = map)

## ----file_cache, message = FALSE----------------------------------------------
library(BiocFileCache)
rname <- "kegg_hsa"
exists <- NROW(bfcquery(query=rname, field="rname")) != 0L
if (!exists)
{
    kegg <- kegg_sets("hsa")
    fl <- bfcnew(rname = rname, ext = ".gmt")
    export(kegg_sets("hsa"), fl)
}
kegg <- import(bfcrpath(rname=rname))

## ----kegg_filter--------------------------------------------------------------
map <- map_add_element(kegg, org.Hs.eg.db, "ENTREZID", "ENSEMBL")
kegg <- kegg %>% mutate_element(ensembl = map)

## ----subset-------------------------------------------------------------------
asthma <- kegg %>% filter_set(set == "hsa05310")

se <- se[rownames(se) %in% es_element(asthma)$ensembl,]

se

## ----multiple-----------------------------------------------------------------
pathways <- c("hsa05310", "hsa04110", "hsa05224", "hsa04970")
multipaths <- kegg %>% filter_set(set %in% pathways)

multipaths

## ----airway2, message = FALSE-------------------------------------------------
data("airway")
airway$dex <- relevel(airway$dex, "untrt")

## ----DE-----------------------------------------------------------------------
library(DESeq2)
library(tibble)
des <- DESeqDataSet(airway, design = ~ cell + dex)
des <- DESeq(des)
res <- results(des)

tbl <- res %>% 
    as.data.frame() %>%
    as_tibble(rownames = "ENSEMBL") 

## ----ENTREZ-------------------------------------------------------------------
tbl <- tbl %>% 
    mutate(
        ENTREZID = mapIds(
            org.Hs.eg.db, ENSEMBL, "ENTREZID", "ENSEMBL"
        ) %>% unname()
    )

tbl <- tbl %>% filter(!is.na(padj), !is.na(ENTREZID))
tbl

## ----goana--------------------------------------------------------------------
library(limma)
go_ids <- goana(tbl$ENTREZID[tbl$padj < 0.05], tbl$ENTREZID, "Hs") %>%
    as.data.frame() %>%
    as_tibble(rownames = "GOALL")
go_ids

## ----final_tibble-------------------------------------------------------------
foo <- AnnotationDbi::select(
    org.Hs.eg.db,
    tbl$ENTREZID,
    "GOALL",
    "ENTREZID") %>% as_tibble()
foo <- foo %>% dplyr::select(-EVIDENCEALL) %>% distinct()
foo <- foo %>% filter(ONTOLOGYALL == "BP") %>% dplyr::select(-ONTOLOGYALL)
foo

## ----BiocSet_from_elementset--------------------------------------------------
foo <- foo %>% dplyr::rename(element = ENTREZID, set = GOALL)
tbl <- tbl %>% dplyr::rename(element = ENTREZID)
go_ids <- go_ids %>% dplyr::rename(set = GOALL)
es <- BiocSet_from_elementset(foo, tbl, go_ids)
es

## ----tibble_or_data.frame-----------------------------------------------------
tibble_from_element(es)

head(data.frame_from_elementset(es))

## ----url----------------------------------------------------------------------
url_ref(go)

## -----------------------------------------------------------------------------
sessionInfo()

Try the BiocSet package in your browser

Any scripts or data that you put into this service are public.

BiocSet documentation built on Nov. 8, 2020, 7:48 p.m.