# test_GeneHancerDB.R
#------------------------------------------------------------------------------------------------------------------------
library(ghdb)
library(RUnit)
#------------------------------------------------------------------------------------------------------------------------
if(!exists("ghdb")){
ghdb <- GeneHancerDB()
} # creating for several tests below
#------------------------------------------------------------------------------------------------------------------------
runTests <- function()
{
test_ctor()
test_listTissues()
test_.eliminateDupsCollapseTissues()
test_foxo6()
test_klf1()
test_failureByTissue()
test_queryUnknownGene()
test_getEnhancers()
test_queryByRegion()
#test_to.hg19()
} # runTests
#------------------------------------------------------------------------------------------------------------------------
test_ctor <- function()
{
message(sprintf("--- test_ctor"))
checkTrue("GeneHancerDB" %in% is(ghdb))
} # test_ctor
#------------------------------------------------------------------------------------------------------------------------
test_listTissues <- function()
{
message(sprintf("--- test_listTissues"))
tissues <- listTissues(ghdb, "MEF2C")
checkTrue(length(tissues) > 100)
checkTrue(all(c("placenta", "Placenta") %in% tissues))
tissues.gata2 <- listTissues(ghdb, targetGene="GATA2")
# 120-130 with gh 411, 152 with v50
checkTrue(length(tissues.gata2) > 150)
checkTrue(length(tissues.gata2) < 160)
} # test_listTissues
#------------------------------------------------------------------------------------------------------------------------
test_.eliminateDupsCollapseTissues <- function()
{
message(sprintf("--- test_.eliminateDupsCollapseTissues"))
load(system.file(package="TrenaProjectHG38", "extdata", "testing", "tbl.gh.results.toTestReductionPreservingTissues.RData"))
checkEquals(dim(tbl), c(956, 17))
tbl.trimmed <- ghdb:::.eliminateDupsCollapseTissues(tbl)
checkEquals(dim(tbl.trimmed), c(50, 16))
# do a rough assay of the collapsed tissue column
spread <- fivenum(nchar(tbl.trimmed$tissue)) # [1] 16 62 156 338 2213
checkTrue(spread[1] < 20)
checkTrue(spread[5] > 2000)
} # test_.eliminateDupsCollapseTissues
#------------------------------------------------------------------------------------------------------------------------
# a simple test case, chosen for historical (and no longer important) reasons
test_foxo6 <- function()
{
message(sprintf("--- test_foxo6"))
# ENCODE and Ensembl use different capitalization
tissues <- c("placenta", "Placenta")
tbl <- retrieveEnhancersFromDatabase(ghdb, "FOXO6", tissues)
checkTrue(is.data.frame(tbl))
checkTrue(nrow(tbl) > 10) # 14 for 5.16, 19 for v54, 23 for v50, 13 for v411
checkTrue(all(tissues %in% tbl$tissue))
checkEquals(length(which(duplicated(tbl$sig))), 0) # no duplicated regions
checkTrue(length(grep(";", tbl$tissue)) > 0) # 4 instances of "Placenta;placenta"
tbl.all <- retrieveEnhancersFromDatabase(ghdb, "FOXO6", tissues="all")
checkTrue(is.data.frame(tbl.all))
checkTrue(nrow(tbl.all) > 40) # 44 for v5.16 3 for v411, 71 for v50, 70 for v54
checkEquals(length(which(duplicated(tbl.all$sig))), 0) # no duplicated regions
checkTrue(max(nchar(tbl.all$tissue)) > 1200) # 2151 for v54
checkTrue(length(grep(";", tbl.all$tissue)) > 40) # most tissue values are semicolo separated multiples
} # test_foxo6
#------------------------------------------------------------------------------------------------------------------------
# another sample, with KLF1 important in erythropoiesis
test_klf1 <- function()
{
message(sprintf("--- test_klf1"))
# ENCODE and Ensembl use different capitalization
allTissues <- listTissues(ghdb, "KLF1")
tissues <- c(grep("myeloid", allTissues, ignore.case=TRUE, v=TRUE),
grep("eryth", allTissues, ignore.case=TRUE, v=TRUE),
grep("blood", allTissues, ignore.case=TRUE, v=TRUE))
tbl <- retrieveEnhancersFromDatabase(ghdb, "KLF1", tissues)
dim(tbl)
checkTrue(nrow(tbl) >= 3)
} # test_klf1
#------------------------------------------------------------------------------------------------------------------------
# a simple test case, chosen for historical (and no longer important) reasons
test_failureByTissue <- function()
{
message(sprintf("--- test_failureByTissue"))
trem2.tissues <- retrieveEnhancersFromDatabase(ghdb, "TREM2", tissues="all")$tissue
tissues <- c("brain", "Brain")
checkTrue(length(intersect(tissues, trem2.tissues)) == 0)
suppressWarnings(
tbl <- retrieveEnhancersFromDatabase(ghdb, "TREM2", tissues)
)
checkEquals(nrow(tbl), 0)
# double check now that at least one tissue reports for TREM2
tbl <- retrieveEnhancersFromDatabase(ghdb, "TREM2", "placenta")
checkTrue(nrow(tbl) > 0)
} # test_failureByTissue
#------------------------------------------------------------------------------------------------------------------------
test_queryUnknownGene <- function()
{
message(sprintf("--- test_queryUnknownGene"))
symbol <- "LINC00982"
suppressWarnings(
tbl <- retrieveEnhancersFromDatabase(ghdb, symbol, tissues="all")
)
checkEquals(dim(tbl), c(0,0))
} # test_queryUnknownGene
#------------------------------------------------------------------------------------------------------------------------
test_getEnhancers <- function()
{
message(sprintf("--- test_getEnhancers"))
all.tissues <- getEnhancerTissues(ghdb, "TREM2")
tbl.trem2.all <- getEnhancers(ghdb, "TREM2")
checkTrue(all(tbl.trem2.all$geneSymbol == "TREM2"))
dim(tbl.trem2.all)
checkTrue(nrow(tbl.trem2.all) > 8) # gh50: 16 gh54: 16
tbl.mef2c <- getEnhancers(ghdb, "MEF2C")
checkTrue(all(tbl.mef2c$geneSymbol == "MEF2C"))
checkTrue(nrow(tbl.mef2c) > 10) # 5.16: 13, gh411: 12 , gh50: 201, gh54 206
suppressWarnings(tbl.bogus <- getEnhancers(ghdb, "bogus99"))
checkEquals(nrow(tbl.bogus), 0)
#----------------------------------------------------------------------------------
# hoxa5 has a nonsensical promoter, 86kb in size
# simon fishilevich calls this an outlier, worth elminating
# comes from ENCODE. he suggests this filter:
# drop all elements >=10kb AND (overlap a gene TSS OR overlap >=3 gene exons)
# i am implementing just the first clause for now
#----------------------------------------------------------------------------------
tbl.hoxa5 <- getEnhancers(ghdb, "HOXA5", maxSize=100000)
checkTrue(nrow(tbl.hoxa5) > 10)
sizes <- with(tbl.hoxa5, 1 + end - start)
checkEquals(length(which(sizes > 10000)), 2) # gh411: 1 gh50: 2, gh54: 2
tbl.hoxa5 <- getEnhancers(ghdb, "HOXA5", maxSize=10000)
checkTrue(nrow(tbl.hoxa5) > 8) # 9 for gh5.16, 10 with gh411
sizes <- with(tbl.hoxa5, 1 + end - start)
checkEquals(length(which(sizes > 10000)), 0)
} # test_getEnhancers
#------------------------------------------------------------------------------------------------------------------------
test_queryByRegion <- function()
{
message(sprintf("--- test_queryByRegion"))
chrom <- 'chr1'
start <- 173595097
end <- 173692966
tbl <- queryByRegion(ghdb, chrom, start, end)
checkTrue(nrow(tbl) > 50) # gh54: 76 gh59: 80
checkTrue(all(tbl$chrom == chrom))
checkTrue(all(tbl$start >= start))
checkTrue(all(tbl$end <= end))
tbl.best <- subset(tbl, combinedscore > 250)
checkTrue(nrow(tbl.best) > 10) # 19 in gh5.16, 15 with gh version 5.0 > 500, 19 >250 in gh54
checkTrue(nrow(tbl.best) < 30)
} # test_queryByRegion
#------------------------------------------------------------------------------------------------------------------------
test_to.hg19 <- function()
{
message(sprintf("--- test_to.hg19"))
tbl.all <- retrieveEnhancersFromDatabase(ghdb, "FOXO6", tissues="all")
checkEquals(nrow(tbl.all), 70) # gh50=71, gh54=70
tbl.hg19 <- to.hg19(ghdb, tbl.all)
checkEquals(dim(tbl.all), dim(tbl.hg19))
checkTrue(length(intersect(tbl.all$start, tbl.hg19$start)) == 0)
checkTrue(all(tbl.all$chrom == tbl.hg19$chrom))
} # test_to.hg19
#------------------------------------------------------------------------------------------------------------------------
test_brca1 <- function()
{
message(sprintf("--- test_brca1"))
tbl.brca1.tissue <- retrieveEnhancersFromDatabase(ghdb, "BRCA1", "mammary epithelial cell")
checkTrue(nrow(tbl.brca1.tissue) < 5)
tbl.brca1.all <- retrieveEnhancersFromDatabase(ghdb, "BRCA1", "all")
checkTrue(nrow(tbl.brca1.all) > 15)
} # test_to.hg19
#------------------------------------------------------------------------------------------------------------------------
if(!interactive())
runTests()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.