inst/unitTests/test_select.R

## unit tests for the "meta-select"
## These tests are testing the software and not the indiv. packages
## Will base testing on humans for now
require(RSQLite)
require("Homo.sapiens")
x <- Homo.sapiens
## debug(OrganismDbi:::.select)
## debug(OrganismDbi:::.mergeSelectResults)
require("Rattus.norvegicus") 
r <- Rattus.norvegicus 

test_keytypes <- function(){
  res <- OrganismDbi:::.keytypes(x)
  checkTrue("GOID" %in% res)
  checkTrue("TXID" %in% res)
  checkTrue("ENTREZID" %in% res)
}

test_makekeytypeMapping <- function(){
  res <- OrganismDbi:::.makekeytypeMapping(x)
  checkTrue("GO.db" == names(res)[res=='GOID'] )
  checkTrue("TxDb.Hsapiens.UCSC.hg19.knownGene" == names(res)[res=='TXID'] )
  checkTrue("org.Hs.eg.db" == names(res)[res=='ENTREZID'] )
}

test_lookupDbFromKeytype <- function(){
  res <- OrganismDbi:::.lookupDbFromKeytype(x, "GOID")
  checkTrue(class(res)=="GODb")  
  res <- OrganismDbi:::.lookupDbFromKeytype(x, "TXID")
  checkTrue(class(res)=="TxDb")
  res <- OrganismDbi:::.lookupDbFromKeytype(x, "ENTREZID")
  checkTrue(class(res)=="OrgDb")
}

test_lookupDbFromKeytype2 <- function(){
  res <- OrganismDbi:::.keys(x, "GOID")
  checkTrue(is.character(head(res)))
  checkTrue(length(res) > 33000)

  res <- OrganismDbi:::.keys(x, "TXID")
  checkTrue(is.character(head(res)))
  checkTrue(length(res) > 70000)

  res <- OrganismDbi:::.keys(x, "ENTREZID")
  checkTrue(is.character(head(res)))
  checkTrue(length(res) > 42000)
}

test_mkeys <- function(){
  tbl1 <- "TxDb.Hsapiens.UCSC.hg19.knownGene"
  tbl2 <- "org.Hs.eg.db"
  res <- OrganismDbi:::.mkeys(x, tbl1, tbl2, key="tbl1")
  checkTrue("GENEID"==res)
  res <- OrganismDbi:::.mkeys(x, tbl1, tbl2, key="tbl2")
  checkTrue("ENTREZID"==res)

  tbl1 <- "GO.db"
  tbl2 <- "org.Hs.eg.db"
  res <- OrganismDbi:::.mkeys(x, tbl1, tbl2, key="tbl1")
  checkTrue("GOID"==res)
  res <- OrganismDbi:::.mkeys(x, tbl1, tbl2, key="tbl2")
  checkTrue("GO"==res)

  tbl1 <- "GO.db"
  tbl2 <- "org.Hs.eg.db"
  res <- OrganismDbi:::.mkeys(x, tbl1, tbl2, key="both")
  res2 <- c("GOID","GO")
  names(res2) <- c("GO.db","org.Hs.eg.db")
  checkEquals(res, res2)
}


test_getSelects <- function(){
  allCols <- OrganismDbi:::.colsByNodes(x)

  ## start at one end case
  cols <- c("TERM", "ALIAS")
  keytype <- "GENEID"
  keys <- head(keys(x, keytype), n=2)
  
  subgr <- OrganismDbi:::.getRelevantSubgraph(x, cols=cols, keys,
                                              keytype=keytype)
  root <- OrganismDbi:::.lookupDbNameFromKeytype(x, keytype)
  fKeys <- OrganismDbi:::.getForeignKeys(x, subgr)
  selectCols <- unique(c(keytype, fKeys, cols))
  needCols <- OrganismDbi:::.getColsByNodes(subgr, selectCols, allCols)
  visitNodes <- OrganismDbi:::.bfs(subgr, root)
  
  res <- OrganismDbi:::.getSelects(x, keytype, keys, needCols, visitNodes)
 
  checkTrue(length(res)==3)
  checkTrue(class(res)=="list")
  checkTrue("GENEID" %in% colnames(res[[1]]))
  checkTrue("GO" %in% colnames(res[[2]]))
  checkTrue("TERM" %in% colnames(res[[3]]))

  ## The very simple case
  cols <- c("SYMBOL")
  keytype <- "OMIM"
  keys <- head(keys(x, keytype), n=2)
  
  subgr <- OrganismDbi:::.getRelevantSubgraph(x, cols=cols, keys,
                                              keytype=keytype)
  root <- OrganismDbi:::.lookupDbNameFromKeytype(x, keytype)
  fKeys <- OrganismDbi:::.getForeignKeys(x, subgr)
  selectCols <- unique(c(keytype, fKeys, cols))
  needCols <- OrganismDbi:::.getColsByNodes(subgr, selectCols, allCols)
  visitNodes <- OrganismDbi:::.bfs(subgr, root)

  res <- OrganismDbi:::.getSelects(x, keytype, keys, needCols, visitNodes)

  checkTrue(length(res)==1)
  checkTrue(class(res)=="list")
  checkTrue("OMIM" %in% colnames(res[[1]]))
  checkTrue("SYMBOL" %in% colnames(res[[1]]))


  
  ## Then there is this case (start in the middle case)
  cols <- c("GOID" ,  "SYMBOL", "TXNAME")
  keytype <- "ENTREZID"
  keys <- head(keys(x, "ENTREZID"))
  
  subgr <- OrganismDbi:::.getRelevantSubgraph(x, cols=cols, keys,
                                              keytype=keytype)
  root <- OrganismDbi:::.lookupDbNameFromKeytype(x, keytype)
  fKeys <- OrganismDbi:::.getForeignKeys(x, subgr)
  selectCols <- unique(c(keytype, fKeys, cols))
  needCols <- OrganismDbi:::.getColsByNodes(subgr, selectCols, allCols)
  visitNodes <- OrganismDbi:::.bfs(subgr, root)

  res <- OrganismDbi:::.getSelects(x, keytype, keys, needCols, visitNodes)

  checkTrue(length(res)==3)
  checkTrue(class(res)=="list")
  checkTrue("SYMBOL" %in% colnames(res[[1]]))
  checkTrue("TXNAME" %in% colnames(res[[2]]))
  checkTrue("GOID" %in% colnames(res[[3]]))
  
}

test_mergeSelectResults <- function(){
  allCols <- OrganismDbi:::.colsByNodes(x)

  cols <- c("GOID" ,  "SYMBOL", "TXNAME")
  keytype <- "ENTREZID"
  keys <- head(keys(x, "ENTREZID"))
  subgr <- OrganismDbi:::.getRelevantSubgraph(x, cols=cols, keys,
                                              keytype=keytype)
  root <- OrganismDbi:::.lookupDbNameFromKeytype(x, keytype)
  fKeys <- OrganismDbi:::.getForeignKeys(x, subgr)
  selectCols <- unique(c(keytype, fKeys, cols))
  needCols <- OrganismDbi:::.getColsByNodes(subgr, selectCols, allCols)
  visitNodes <- OrganismDbi:::.bfs(subgr, root)
  selected <- OrganismDbi:::.getSelects(x, keytype, keys, needCols, visitNodes)
  res <- OrganismDbi:::.mergeSelectResults(x, selected, visitNodes)
  
  checkTrue(dim(res)[2]==8)
  checkTrue(class(res)=="data.frame")
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("TXNAME" %in% colnames(res))  
}


## MANY more tests
test_select <- function(){
  cls <- c("GO","ALIAS")
  keys <- head(keys(x, "ENTREZID"))
  keytype <- "ENTREZID"
  res <- OrganismDbi:::.select(x, keys, cls, keytype)  
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==5)
  checkTrue(class(res)=="data.frame")
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("EVIDENCE" %in% colnames(res)) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 

  cls <- c("IPI", "ALIAS", "CDSSTART") 
  res <- OrganismDbi:::.select(x, keys, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==4) 
  checkTrue("IPI" %in% colnames(res)) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 
  checkTrue("CDSSTART" %in% colnames(res)) 

  cls <- c("GOID","ENTREZID")
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==4)
  checkTrue("GOID" %in% colnames(res)) 
  checkTrue("ENTREZID" %in% colnames(res))
 
  cls <- c("ALIAS","TXCHROM","EXONNAME")
  res <- OrganismDbi:::.select(x, keys, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==4) 
  checkTrue("ALIAS" %in% colnames(res)) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("EXONNAME" %in% colnames(res)) 
  
  cls <- c("ACCNUM","CDSSTART") 
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==3)
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("ACCNUM" %in% colnames(res))
  checkTrue("CDSSTART" %in% colnames(res))

  cls <- c("ACCNUM", "ALIAS")
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==3)
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("ACCNUM" %in% colnames(res))
  checkTrue("ALIAS" %in% colnames(res))

  cls <- c("CDSSTART","CDSEND")
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==3)
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("CDSSTART" %in% colnames(res))
  checkTrue("CDSEND" %in% colnames(res))

  cls <- c("CDSSTART")
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==2)
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("CDSSTART" %in% colnames(res))

  cls <- c("ENTREZID")
  res <- OrganismDbi:::.select(x, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==1)
  checkTrue("ENTREZID" %in% colnames(res))

  keys <- head(keys(x, "ENTREZID"))
  keytype <- "ENTREZID"
  cls <- c("GOID" ,  "SYMBOL", "TXNAME")
  res <- select(Homo.sapiens, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==6)
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("GOID" %in% colnames(res)) 
  checkTrue("SYMBOL" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res)) 

##   ## This tests for fields that are not in the final Homo.sapiens pkg
##   ## I am keeping it because it may be of use if I decide to add hom pkgs
##   cls <- c("ALIAS", "ORYZA_SATIVA")
##   res <- select(Homo.sapiens, keys, cls, keytype)
##   checkTrue(dim(res)[1] >0)
##   checkTrue(dim(res)[2]==3)
##   checkTrue("ENTREZID" %in% colnames(res)) 
##   checkTrue("ALIAS" %in% colnames(res)) 
##   checkTrue("ORYZA_SATIVA" %in% colnames(res))

  ## Getting an error here 
  keys <- head(keys(x, "TXNAME"))
  keytype <- "TXNAME"
  cls <- c("ENTREZID" , "TXNAME")
  res <- select(Homo.sapiens, keys, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==2)
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res))   
}


## Also need to test a species with other keys to join DBs

test_rattus <- function(){ 
  cls <- c("GO","ALIAS") 
  k <- head(keys(r, "ENTREZID")) 
  keytype <- "ENTREZID" 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==5) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("ONTOLOGY" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 

  cls <- c("GO","ALIAS","TXCHROM","TXNAME") 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==7) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 
  checkTrue("TXCHROM" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res)) 

  cls <- c("TXCHROM","TXNAME") 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==3) 
  checkTrue("ENTREZID" %in% colnames(res)) 
  checkTrue("TXCHROM" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res)) 

  ## now test different keytype
  k <- head(keys(r, keytype="ENSEMBL"))
  keytype <- "ENSEMBL"
  cls <- c("GO","ALIAS","TXCHROM","TXNAME") 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==7) 
  checkTrue("ENSEMBL" %in% colnames(res)) 
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("ONTOLOGY" %in% colnames(res)) 
  checkTrue("TXCHROM" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res)) 

  ## now test key that starts us from TxDb
  k <- head(keys(r, keytype="TXNAME"))
  keytype <- "TXNAME"
  cls <- c("GO","ALIAS","TXCHROM") 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==6) 
  checkTrue("TXNAME" %in% colnames(res)) 
  checkTrue("GO" %in% colnames(res)) 
  checkTrue("EVIDENCE" %in% colnames(res)) 
  checkTrue("TXCHROM" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res)) 

  ## now test key that starts us from Go
  ## TODO: A cleanup bug??? <- Row of NAs in 1st line
  k <- head(keys(r, keytype="GOID"))
  keytype <- "GOID"
  cls <- c("GOID","ALIAS","TXCHROM") 
  res <- OrganismDbi:::.select(r, k, cls, keytype) 
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==5) 
  checkTrue("GOID" %in% colnames(res)) 
  checkTrue("ONTOLOGY" %in% colnames(res)) 
  checkTrue("TXCHROM" %in% colnames(res)) 
  checkTrue("ALIAS" %in% colnames(res))


  ## what happens when we use a key from the middle?
  k <- keys <- head(keys(r,keytype="ENTREZID"))
  cls <- c("GOID","SYMBOL","TXNAME")
  keytype <- "ENTREZID"
  res <- OrganismDbi:::.select(r, k, cls, keytype)
  checkTrue(dim(res)[1] >0)
  checkTrue(dim(res)[2]==6) 
  checkTrue("ENTREZID" %in% colnames(res))
  checkTrue("GOID" %in% colnames(res)) 
  checkTrue("SYMBOL" %in% colnames(res)) 
  checkTrue("TXNAME" %in% colnames(res))   
} 






test_dbconn_and_dbfile <- function(){
    resc <- dbconn(x)[[1]]
    m <- dbGetQuery(resc, "SELECT * FROM metadata")
    checkTrue(dim(m)[2] ==2)
    checkTrue(dim(m)[1] > 6)
              
    resf <- dbfile(x)[[1]]
    mf <- dbGetQuery(dbConnect(SQLite(), resf), "SELECT * FROM metadata")
    checkTrue(all(mf == m))
}


test_selectByRanges <- function(){
    ranges <-  GRanges(seqnames=Rle(c('chr11'), c(2)),IRanges(start=c(107899550, 108025550), end=c(108291889, 108050000)), strand='+', seqinfo=seqinfo(Homo.sapiens))
    
    res <- selectByRanges(x, ranges, 'SYMBOL', 'tx')
    checkTrue(length(res) > 2)
    checkTrue('ACAT1' %in% res$SYMBOL[[1]])
    checkTrue(class(res)=="GRanges")

    res2 <- selectByRanges(Homo.sapiens, ranges, c('SYMBOL','PATH'), '5utr')
    checkTrue(length(res2) > 1)
    checkTrue('ACAT1' %in% res2$SYMBOL[[1]])
    checkTrue('04110' %in% res2$PATH[[2]])
    checkTrue(length(res2$PATH[[2]]) > 1)
    checkTrue(class(res2)=="GRanges")
}


test_selectRangesById <- function(){
    ## notice one of my keys is 'bad' (but things still work)
    res <- selectRangesById(x, c('bob','A1BG'), columns='SYMBOL',
                            keytype='SYMBOL')
    checkTrue(length(res[[1]]) > 1)
    checkTrue('A1BG' %in% res[[1]]$SYMBOL[[1]])
    checkTrue('A1BG' %in% names(res))
    checkTrue(class(res[[1]])=="GRanges")
    
    ##In the case of bad keys it should still work, but some will be filtered
    symbols <- keys(x, 'SYMBOL')[1:10]
    res2 <- selectRangesById(x,keys=symbols,keytype='SYMBOL')
    checkTrue(class(res2[[1]])=="GRanges")
    checkTrue('A1BG' %in% names(res2))
}











## Fast testing: BiocGenerics:::testPackage(pattern="^test_select.*\\.R$")


## TODO: add something to fix the the cosmetic bug where then the GOID is to the right of the columns that come with it (like EVIDENCE and/or ONTOLOGY.  This should really be handled in a general way (even though it ONLY happens with GOID)
Bioconductor/OrganismDbi documentation built on March 19, 2024, 12:40 p.m.