inst/doc/AnnotationDbi.R

## ----style, eval=TRUE, echo=FALSE, results='asis'--------------------------
BiocStyle::latex()

## ----include=FALSE---------------------------------------------------------
library(knitr)
opts_chunk$set(tidy=FALSE)

## ----<available schemas, results='hide'------------------------------------
library(DBI)
library(org.Hs.eg.db)
library(AnnotationForge)
available.dbschemas()

## ----setup0, results='hide', echo=FALSE-------------------------------
options(continue=" ", prompt="R> ", width=72L)

## ----setup, results='hide'--------------------------------------------
library(hgu95av2.db)

## ----objects----------------------------------------------------------
ls("package:hgu95av2.db")

## ----Question #1, echo=FALSE, results='hide'--------------------------
library(hgu95av2.db)
search()
hgu95av2_dbschema()
org.Hs.eg_dbschema()

## ----QAlisting--------------------------------------------------------
qcdata = capture.output(hgu95av2())
head(qcdata, 20)

## ----mapcounts, eval=FALSE--------------------------------------------
#  hgu95av2MAPCOUNTS

## ----envApiDemo1------------------------------------------------------
all_probes <- ls(hgu95av2ENTREZID)
length(all_probes)

set.seed(0xa1beef)
probes <- sample(all_probes, 5)
probes

## ----envApiDemo2------------------------------------------------------
hgu95av2ENTREZID[[probes[1]]]
hgu95av2ENTREZID$"31882_at"

syms <- unlist(mget(probes, hgu95av2SYMBOL))
syms

## ----helpDemo, eval= FALSE, results='hide'----------------------------
#  ?hgu95av2CHRLOC

## ----Question #2, echo=FALSE, results='hide'--------------------------
mget(probes, hgu95av2CHRLOC, ifnotfound=NA)[1:2]

## ----as.list, eval=FALSE----------------------------------------------
#  system.time(as.list(hgu95av2SYMBOL)[1:10])
#  
#  ## vs:
#  
#  system.time(as.list(hgu95av2SYMBOL[1:10]))

## ----show.revmap------------------------------------------------------
unlist(mget(syms, revmap(hgu95av2SYMBOL)))

## ----thisworks--------------------------------------------------------
as.list(revmap(hgu95av2PATH)["00300"])

## ----revmap2----------------------------------------------------------
x <- hgu95av2PATH
## except for the name, this is exactly revmap(x)
revx <- hgu95av2PATH2PROBE
revx2 <- revmap(x, objName="PATH2PROBE")
revx2
identical(revx, revx2)

as.list(revx["00300"])

## ----revmap2b---------------------------------------------------------
Term("GO:0000018")
Definition("GO:0000018")

## ----Question #3, echo=FALSE, results='hide'--------------------------
rs = ls(revmap(org.Hs.egREFSEQ))[4:6]
EGs = mget(rs, revmap(org.Hs.egREFSEQ), ifnotfound=NA)
##Then get the GO terms.
GOs = mget(unlist(EGs), org.Hs.egGO, ifnotfound=NA)
GOs
##Extract the GOIDs from this list:
GOIDs = as.character(unique(sapply(GOs, names)))
##Then look up what these terms are:
Term(GOIDs)

## ----toTable----------------------------------------------------------
head(toTable(hgu95av2GO[probes]))

## ----undirectedMethod-------------------------------------------------
toTable(x)[1:6, ]
toTable(revx)[1:6, ]

## ----directedMethods--------------------------------------------------
length(x)
length(revx)
allProbeSetIds <- keys(x)
allKEGGIds <- keys(revx)

## ----moreUndirectedMethods--------------------------------------------
junk <- Lkeys(x)        # same for all maps in hgu95av2.db (except pseudo-map
                        # MAPCOUNTS)
Llength(x)              # nb of Lkeys
junk <- Rkeys(x)        # KEGG ids for PATH/PATH2PROBE maps, GO ids for
                        # GO/GO2PROBE/GO2ALLPROBES maps, etc...
Rlength(x)              # nb of Rkeys

## ----moreKeysMethods--------------------------------------------------
x = hgu95av2ENTREZID[1:10]
## Directed methods
mappedkeys(x)           # mapped keys
count.mappedkeys(x)     # nb of mapped keys
## Undirected methods
mappedLkeys(x)          # mapped left keys
count.mappedLkeys(x)    # nb of mapped Lkeys

## ----isNA-------------------------------------------------------------
y = hgu95av2ENTREZID[isNA(hgu95av2ENTREZID)]     # usage like is.na()
Lkeys(y)[1:4]

## ----Question #4, echo=FALSE, results='hide'--------------------------

count.mappedLkeys(hgu95av2GO)
Llength(hgu95av2GO) - count.mappedLkeys(hgu95av2GO)
mappedLkeys(hgu95av2GO)[1]
toTable(hgu95av2GO["1000_at"])

## ----revmapUseCases---------------------------------------------------
x <- hgu95av2CHR
Rkeys(x)
chroms <- Rkeys(x)[23:24]
chroms
Rkeys(x) <- chroms
toTable(x)

## ----easy-------------------------------------------------------------
z <- as.list(revmap(x)[chroms])
names(z)
z[["Y"]]

## ----evilUnlist-------------------------------------------------------
chrs = c("12","6")
mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA)

## ----evilUnlist2------------------------------------------------------
unlist(mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA))

## ----evilUnlist3------------------------------------------------------
unlist2(mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA))

## ----cytogenetic2-----------------------------------------------------
x <- hgu95av2MAP
pbids <- c("38912_at", "41654_at", "907_at", "2053_at", "2054_g_at",
           "40781_at")
x <- subset(x, Lkeys=pbids, Rkeys="18q11.2")
toTable(x)

## ----coerce-----------------------------------------------------------
  pb2cyto <- as.character(x)
  pb2cyto[pbids]

## ----coercWarnings----------------------------------------------------
  cyto2pb <- as.character(revmap(x))

## ----multiProbes------------------------------------------------------
  ## How many probes?
  dim(hgu95av2ENTREZID)
  ## Make a mapping with multiple probes exposed
  multi <- toggleProbes(hgu95av2ENTREZID, "all")
  ## How many probes?
  dim(multi)

## ----multiProbes2-----------------------------------------------------
  ## Make a mapping with ONLY multiple probes exposed
  multiOnly <- toggleProbes(multi, "multiple")
  ## How many probes?
  dim(multiOnly)

  ## Then make a mapping with ONLY single mapping probes
  singleOnly <- toggleProbes(multiOnly, "single")
  ## How many probes?
  dim(singleOnly)

## ----multiProbes3-----------------------------------------------------
  ## Test the multiOnly mapping
  hasMultiProbes(multiOnly)
  hasSingleProbes(multiOnly)

  ## Test the singleOnly mapping
  hasMultiProbes(singleOnly)
  hasSingleProbes(singleOnly)

## ----orgSchema, results='hide'----------------------------------------
org.Hs.eg_dbschema()

## ----connObj, results='hide'------------------------------------------
org.Hs.eg_dbconn()

## ----connObj2, results='hide'-----------------------------------------
query <- "SELECT gene_id FROM genes LIMIT 10;"
result = dbGetQuery(org.Hs.eg_dbconn(), query)
result

## ----Question #5, echo=FALSE, results='hide'--------------------------
sql <- "SELECT gene_id, chromosome FROM genes AS g, chromosomes AS c WHERE g._id=c._id;"
dbGetQuery(org.Hs.eg_dbconn(),sql)[1:10,]

##OR
toTable(org.Hs.egCHR)[1:10,]

## ----complexEnv, eval=FALSE-------------------------------------------
#  ## Obtain SYMBOLS with at least one GO BP
#  ## annotation with evidence IMP, IGI, IPI, or IDA.
#  system.time({
#  bpids <- eapply(hgu95av2GO, function(x) {
#      if (length(x) == 1 && is.na(x))
#        NA
#      else {
#          sapply(x, function(z) {
#              if (z$Ontology == "BP")
#                z$GOID
#              else
#                NA
#              })
#      }
#  })
#  bpids <- unlist(bpids)
#  bpids <- unique(bpids[!is.na(bpids)])
#  g2p <- mget(bpids, hgu95av2GO2PROBE)
#  wantedp <- lapply(g2p, function(x) {
#      x[names(x) %in% c("IMP", "IGI", "IPI", "IDA")]
#  })
#  wantedp <- wantedp[sapply(wantedp, length) > 0]
#  wantedp <- unique(unlist(wantedp))
#  ans <- unlist(mget(wantedp, hgu95av2SYMBOL))
#  })
#  length(ans)
#  ans[1:10]

## ----schema, results='hide'-------------------------------------------
hgu95av2_dbschema()

## ----schema2, results='hide'------------------------------------------
hgu95av2ORGPKG

## ----schema3, results='hide'------------------------------------------
org.Hs.eg_dbschema()

## ----hgu95av2_org_join, tidy=FALSE------------------------------------
orgDBLoc = system.file("extdata", "org.Hs.eg.sqlite", package="org.Hs.eg.db")
attachSQL = paste("ATTACH '", orgDBLoc, "' AS orgDB;", sep = "")
dbGetQuery(hgu95av2_dbconn(), attachSQL)

## ----complexDb--------------------------------------------------------
system.time({
SQL <- "SELECT DISTINCT probe_id,symbol FROM probes, orgDB.gene_info AS gi, orgDB.genes AS g, orgDB.go_bp AS bp WHERE bp._id=g._id AND gi._id=g._id AND probes.gene_id=g.gene_id AND bp.evidence IN ('IPI', 'IDA', 'IMP', 'IGI')"
zz <- dbGetQuery(hgu95av2_dbconn(), SQL)
})
#its a good idea to always DETACH your database when you are finished...
dbGetQuery(hgu95av2_dbconn(), "DETACH orgDB"         )

## ----Question #6, echo=FALSE, results='hide'--------------------------
sql <- "SELECT gene_id, start_location, end_location, cytogenetic_location FROM genes AS g, chromosome_locations AS c, cytogenetic_locations AS cy WHERE g._id=c._id AND g._id=cy._id"
dbGetQuery(org.Hs.eg_dbconn(),sql)[1:10,]

## ----Question #7, echo=FALSE, results='hide'--------------------------
orgDBLoc = system.file("extdata", "org.Hs.eg.sqlite", package="org.Hs.eg.db")
attachSQL = paste("ATTACH '", orgDBLoc, "' AS orgDB;", sep = "")
dbGetQuery(hgu95av2_dbconn(), attachSQL)

goDBLoc = system.file("extdata", "GO.sqlite", package="GO.db")
attachSQL = paste("ATTACH '", goDBLoc, "' AS goDB;", sep = "")
dbGetQuery(hgu95av2_dbconn(), attachSQL)

SQL <- "SELECT DISTINCT p.probe_id, gi.symbol, gt.go_id, gt.definition
    FROM probes 
        AS p, orgDB.gene_info AS gi, orgDB.genes AS g, orgDB.go_bp 
        AS bp, goDB.go_term AS gt  
    WHERE bp._id=g._id AND gi._id=g._id AND p.gene_id=g.gene_id 
        AND bp.evidence IN ('IPI', 'IDA', 'IMP', 'IGI') AND gt.go_id=bp.go_id"
zz <- dbGetQuery(hgu95av2_dbconn(), SQL)

dbGetQuery(hgu95av2_dbconn(), "DETACH orgDB")
dbGetQuery(hgu95av2_dbconn(), "DETACH goDB")

## ----SessionInfo, echo=FALSE------------------------------------------
sessionInfo()

Try the AnnotationDbi package in your browser

Any scripts or data that you put into this service are public.

AnnotationDbi documentation built on Nov. 8, 2020, 4:50 p.m.